def plot_hdi_groups(model_name, param_key, hc_dict, pt_dict, sort, credible_interval=0.94, point_estimate='mean', bins='auto', round_to=2): """plotting param posterior in groups""" # kwargs.setdefault('color', 'black') # x = [hc_dict[param_key], pt_dict[param_key]] x = [hc_dict, pt_dict] leg = [['Control'], ['Patient']] colors = ['blue','green'] fig, axes = plt.subplots(2,1, sharex=True) for i in range(2): ax = axes[i] az.plot_posterior(x[i], ax=ax, kind='hist', credible_interval=credible_interval, point_estimate=point_estimate, bins=bins, round_to=round_to, color=colors[i]) if i ==0: title = param_key else: title = '' ax.set_title(title) ax.legend(leg[i]) # save fig save_dir = './figs/'+model_name+'/' if not os.path.isdir(save_dir): os.mkdir(save_dir) if sort: save_name = param_key+'_hdi_sorted.png' else: save_name = param_key+'_hdi.png' fig = ax.get_figure() fig.savefig(save_dir+save_name)
def plot_hdi_diff(model_name, param_key, diff_dict, sort, credible_interval=0.94, point_estimate='mean', bins='auto', round_to=2): """plotting param posterior in diff""" # kwargs.setdefault('color', 'black') # x = [hc_dict[param_key], pt_dict[param_key]] x = diff_dict fig, ax = plt.subplots(1,1, sharex=True) az.plot_posterior(x, ax=ax, kind='hist', credible_interval=credible_interval, point_estimate=point_estimate, bins=bins, round_to=round_to, color='black') title = param_key ax.set_title(title) ax.legend(['Control-Patient']) # save fig save_dir = './figs/'+model_name+'/' if not os.path.isdir(save_dir): os.mkdir(save_dir) if sort: save_name = param_key+'_hdi_diff_sorted.png' else: save_name = param_key+'_hdi_diff.png' fig = ax.get_figure() fig.savefig(save_dir+save_name)
def param_posterior_arviz_plots(inferred, variables): az.plot_posterior(inferred, var_names=variables, kind='hist') az.plot_pair(inferred, var_names=variables, kind='hexbin', colorbar=True, divergences=True)
def EPC_compare_fitter_to_bayes(RB_model, azs, trace, m_name, rbfit): EPC_Bayes, EPC_Bayes_err, Bayes_legend, Fitter_legend, pred_epc_legend = get_EPC_and_legends( rbfit, azs) with RB_model: az.plot_posterior(trace, var_names=['alpha'], round_to=4, transform=alpha_to_EPC, point_estimate=None) plt.title("Error per Clifford " + RB_process + " device: " + hardware + ' backend: ' + backend.name() + ' model:' + m_name, fontsize=12) plt.axvline(x=alpha_to_EPC(alpha_ref), color='red') if pred_epc > 0.0: plt.axvline(x=pred_epc, color='green') plt.legend((Bayes_legend, "Higher density interval", Fitter_legend, pred_epc_legend), fontsize=10) else: plt.legend( (Bayes_legend, "Higher density interval", Fitter_legend), fontsize=10) plt.show()
def exercise4(): with pm.Model() as basic_model: probabilities = [0.3, 0.7, 0.95] likelihood_params = np.array( [np.divide(1, 3) * (1 + 2 * prob) for prob in probabilities]) group = pm.Categorical('group', p=np.array([1, 1, 1])) p = pm.Deterministic('p', theano.shared(likelihood_params)[group]) positive_answers = pm.Binomial('positive_answers', n=num_questions, p=p, observed=[7]) trace = pm.sample(4000, progressbar=True) az.plot_trace(trace) plt.show() az.plot_posterior(trace) plt.show() az.summary(trace) return trace
def plot_post(data, var_names, lims): n = len(var_names) fig, axes = plt.subplots(nrows=n, ncols=1, constrained_layout=True) for i in range(n): if n > 1: ax = axes[i] else: ax = axes ax.set_xlim(lims[i]) arviz.plot_posterior(data, var_names=var_names[i], ax=ax, bins=100, kind='hist', credible_interval=0.95) plt.show()
def plot_posteriors(self, parameter=None): if not (self.mcmc_ and self.data_): raise AttributeError('Object needs to be fit first.') if parameter: plot_posteriors(self.mcmc_, parameter) else: sample_dict = self.mcmc_.extract() _, ax = plt.subplots(3, 3, figsize=(21, 15)) for i, param in enumerate(['mu', 'sigma']): self._add_posterior_plot(sample_dict, 'mu', ax[0, i], idx=i) ax[0, i].set_title('$\mu_{}$'.format(i + 1)) self._add_posterior_plot(sample_dict, 'sigma', ax[1, i], idx=i) ax[1, i].set_title('$\sigma_{}$'.format(i + 1)) _ = az.plot_posterior( # NOQA sample_dict[param][:, 0] - sample_dict[param][:, 1], credible_interval=0.95, ax=ax[i, 2], ref_val=self.ref_val.get(param), rope=self.rope.get(param)) _ = ax[0, 2].set_title('$\mu_1 - \mu_2$') _ = ax[1, 2].set_title('$\sigma_1 - \sigma_2$') self._add_posterior_plot(sample_dict, 'nu', ax[2, 0]) _ = ax[2, 0].set_title("$\\nu$") self._add_posterior_plot(sample_dict, 'log_nu', ax[2, 1]) _ = ax[2, 1].set_title("$\log(\\nu)$") effect_size = (sample_dict['mu'][:, 0] - sample_dict['mu'][:, 1]) / np.linalg.norm( sample_dict['sigma'], axis=1) * np.sqrt(2) _ = az.plot_posterior( # NOQA effect_size, credible_interval=0.95, ax=ax[2, 2]) _ = ax[2, 2].set_title("$(\mu_1-\mu_2)/\sqrt{(\sigma_1+\sigma_2)/2}$") plt.tight_layout(pad=4) plt.show()
def plot(self, type: str = 'dist', credible_interval: float = 0.94, point_estimate: str = 'mean', bins: Union[int, Sequence, str] = 'auto', round_to: int = 2, **kwargs): """General purpose plotting for hbayesdm-py. This function plots hyper-parameters. Parameters ---------- type Current options are: 'dist', 'trace'. Defaults to 'dist'. credible_interval Credible interval to plot. Defaults to 0.94. point_estimate Show point estimate on plot. Options are: 'mean', 'median' or 'mode'. Defaults to 'mean'. bins Controls the number of bins. Defaults to 'auto'. Accepts the same values (or keywords) as plt.hist() does. round_to Controls formatting for floating point numbers. Defaults to 2. **kwargs Passed as-is to plt.hist(). """ type_options = ('dist', 'trace') if type not in type_options: raise RuntimeError('Plot type must be one of ' + repr(type_options)) if self.model_type == 'single': var_names = list(self.parameters_desc) else: var_names = ['mu_' + p for p in self.parameters_desc] if type == 'dist': kwargs.setdefault('color', 'black') axes = az.plot_posterior(self.fit, kind='hist', var_names=var_names, credible_interval=credible_interval, point_estimate=point_estimate, bins=bins, round_to=round_to, **kwargs) for ax, (p, desc) in zip(axes, self.parameters_desc.items()): ax.set_title('{} ({})'.format(p, desc)) elif type == 'trace': az.plot_trace(self.fit, var_names=var_names) plt.show()
def facetplot_azid_dist(azid, rvs, rvs_hack_extra=0, group='posterior', **kwargs): """Control facet positioning of Arviz Krushke style plots, data in azid Pass-through kwargs to az.plot_posterior, e.g. ref_val """ # TODO unpack the compressed rvs from the azid j = 3 m, n = j, ((len(rvs)+rvs_hack_extra-j) // j) + ((len(rvs)+rvs_hack_extra-j) % j) f, axs = plt.subplots(n, m, figsize=(4+m*3, 2.2*n)) _ = az.plot_posterior(azid, group=group, ax=axs, var_names=rvs, **kwargs) f.suptitle(f'{group} {rvs}', y=0.96 + n*0.005) f.tight_layout()
def plot_BEST(self, query=None, rope=(-1, 1), **kwargs): """ eg query = 'opsin=="chr2" & delay_length==60' """ trace_post_query = utils.query_posterior( query, self.trace.posterior) if query else self.trace.posterior # TODO querying trace.posterior will have to wait for replacing actual values of index with originals # trace_post_query = trace.query() az.plot_posterior((trace_post_query.sel({ self.window.treatment: self.trace.posterior['mu_per_condition'][ self.window.treatment].max() }) - trace_post_query.sel({ self.window.treatment: self.trace.posterior['mu_per_condition'][ self.window.treatment].min() })), 'mu_per_condition', rope=rope, ref_val=0, **kwargs)
def _plot_posterior(self, data, out_file, title_str, var_names=None, out_file1=None): """Plot posterior using `arviz.plot_posterior`.""" _ = az.plot_posterior(data, var_names=var_names) fig = plt.gcf() fig.suptitle(title_str, fontsize=14, y=1.05) self._savefig(fig, out_file) if out_file1 is not None: self._savefig(fig, out_file1)
def bayesEstimation(_smoothingWindow, _raw): #_raw = _raw[:500]#Calls the processing function#FOR DEBUG, SINGAL WHEN, THESE IS DATA FROM WHEN THE MACHINE IS NOT RUNNNING X = sglProcessing(_raw, _smoothingWindow) #Calls the processing function print(np.mean(X)) n_samples = 1000 with pm.Model() as model: mu = pm.Normal('mu', mu=50, sd=1) mu = 50 sigma = pm.HalfNormal("sigma", sd=30) estimation = pm.Normal("estimation", mu=mu, sd=sigma, observed=X) trace = pm.sample(n_samples) print("Showing the plots") az.plot_kde(X, rug=True) plt.yticks([0], alpha=0) plt.show() pm.traceplot(trace, legend=True) print(az.summary(trace)) print( "----------------------------------------------------------------------" ) plt.show() az.plot_posterior(trace) plt.title("posterior") plt.show() ppc = pm.sample_posterior_predictive(trace, samples=10, model=model) print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") plt.plot(ppc['estimation'].T) plt.show() az.plot_kde(ppc['estimation'].T) az.plot_kde(X, rug=True) plt.title("simulated data dist") plt.show()
def exercise3(): with pm.Model() as basic_model: # Priors theta_1 = pm.Interpolated('theta_1', x_points=np.array( [0, np.divide(1, np.sqrt(5))]), pdf_points=np.array([0, 10 * np.sqrt(5)])) # define likelihood times = pm.Exponential('times', lam=theta_1, observed=[30, 25, 15, 40, 20]) map_estimator = pm.find_MAP(vars=[theta_1])['theta_1'] print("The MAP estimator for the model is {0}".format(map_estimator)) trace = pm.sample(10000, progressbar=True) az.plot_posterior(trace) plt.show() az.summary(trace)
def facetplot_df_dist(df, rvs, rvs_hack_extra=0, **kwargs): """Control facet positioning of Arviz Krushke style plots, data in df Pass-through kwargs to az.plot_posterior, e.g. ref_val """ m, n = 2, ((len(rvs)+rvs_hack_extra) // 2) + ((len(rvs)+rvs_hack_extra) % 2) sharex = kwargs.get('sharex', False) f, axs = plt.subplots(n, m, figsize=(m*6, 2.2*n), sharex=sharex) ref_val = kwargs.get('ref_val', [None for i in range(len(df))]) for i, ft in enumerate(df.columns): axarr = az.plot_posterior(df[ft].values, ax=axs.flatten()[i], ref_val=ref_val[i]) axarr.set_title(ft) title = kwargs.get('title', '') f.suptitle(f'{title} {rvs}', y=0.93 + n*0.005) f.tight_layout()
def plot_param_diagnostics(mod, incl_noise_params=False, incl_trend_params=False, incl_smooth_params=False, which='trace', **kwargs): """ Parameters ----------- mod : orbit model object which : str, {'density', 'trace', 'pair', 'autocorr', 'posterior', 'forest'} incl_noise_params : bool if plot noise parameters; default False incl_trend_params : bool if plot trend parameters; default False incl_smooth_params : bool if plot smoothing parameters; default False **kwargs : other parameters passed to arviz functions Returns ------- matplotlib axes object """ posterior_samples = get_arviz_plot_dict( mod, incl_noise_params=incl_noise_params, incl_trend_params=incl_trend_params, incl_smooth_params=incl_smooth_params) if which == "trace": axes = az.plot_trace(posterior_samples, **kwargs) elif which == "density": axes = az.plot_density(posterior_samples, **kwargs) elif which == "posterior": axes = az.plot_posterior(posterior_samples, **kwargs) elif which == "pair": axes = az.plot_pair(posterior_samples, **kwargs) elif which == "autocorr": axes = az.plot_autocorr(posterior_samples, **kwargs) elif which == "forest": axes = az.plot_forest(posterior_samples, **kwargs) else: raise Exception( "please use one of 'trace', 'density', 'posterior', 'pair', 'autocorr', 'forest' for kind." ) return axes
def plot_hdi(x: np.ndarray, credible_interval: float = 0.94, title: str = None, xlabel: str = 'Value', ylabel: str = 'Density', point_estimate: str = None, bins: Union[int, Sequence, str] = 'auto', round_to: int = 2, **kwargs): """Plot highest density interval (HDI). This function redirects input to `arviz.plot_posterior` function. Parameters ---------- x Array containing MCMC samples. credible_interval Credible interval to plot. Defaults to 0.94. title String to set as title of plot. xlabel String to set as the x-axis label. ylabel String to set as the y-axis label. point_estimate Defaults to None. Possible options are 'mean', 'median', 'mode'. bins Controls the number of bins. Defaults to 'auto'. Accepts the same values (or keywords) as plt.hist() does. round_to Controls formatting for floating point numbers. Defaults to 2. **kwargs Passed as-is to plt.hist(). """ kwargs.setdefault('color', 'black') ax = az.plot_posterior(x, kind='hist', credible_interval=credible_interval, point_estimate=point_estimate, bins=bins, round_to=round_to, **kwargs).item() ax.set_title(title) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) plt.show()
def mcmc_diagnostic_plots(posterior, sample_stats, it): az_trace = az.from_dict(posterior=posterior, sample_stats=sample_stats) """ # 2 parameters or more for these pair plots if len(az_trace.posterior.data_vars) > 1: ax = az.plot_pair(az_trace, kind="hexbin", gridsize=30, marginals=True) fig = ax.ravel()[0].figure plt.ylim((5000, 30000)) plt.xlim((1e-10, 1e-7)) fig.savefig(f"./results/pair_plot_it{it}.png") plt.clf() ax = az.plot_pair( az_trace, kind=["scatter", "kde"], kde_kwargs={"fill_last": False}, point_estimate="mean", marginals=True, ) fig = ax.ravel()[0].figure fig.savefig(f"./results/point_estimate_plot_it{it}.png") plt.clf() """ ax = az.plot_trace(az_trace, divergences=False) fig = ax.ravel()[0].figure fig.savefig(f"./results/trace_plot_it{it}.png") plt.clf() ax = az.plot_posterior(az_trace) fig = ax.ravel()[0].figure fig.savefig(f"./results/posterior_plot_it{it}.png") plt.clf() lag = np.minimum(len(list(posterior.values())[0]), 100) ax = az.plot_autocorr(az_trace, max_lag=lag) fig = ax.ravel()[0].figure fig.savefig(f"./results/autocorr_plot_it{it}.png") plt.clf() ax = az.plot_ess(az_trace, kind="evolution") fig = ax.ravel()[0].figure fig.savefig(f"./results/ess_evolution_plot_it{it}.png") plt.clf() plt.close()
def coin_tossing_with_given_prior_and_data(a_prior, b_prior, data): # This line is just to initialize the model with pm.Model() as coin_flipping_model: # The following line defines the prior, theta distributes as Beta(a_prior,b_prior) theta = pm.Beta('theta', alpha=a_prior, beta=b_prior) # The following line defines the likelihood Bernoulli with p=theta y = pm.Bernoulli('y', p=theta, observed=data) # This line generates 1000 samples of the posterior trace = pm.sample(1000) # Plot the traces az.plot_trace(trace) plt.show() # Plot the sampled posterior pp = az.plot_posterior(trace) plt.show() # Print the summary print(az.summary(trace))
# set a Bernoulli likelihood for the data y = pm.Bernoulli("obs", p=theta, observed=data) # inference step trace = pm.sample(1000, random_seed=123) # ------------------------- analyze the posterior ----------------------------------------------- # with bernoulli_model: log.info("The trace is as follows: %s", trace["theta"]) log.info("the dimensions of the trace is: %s", trace["theta"].shape) # see the summary of the trace log.info("The summary of the trace is: %s", az.summary(trace)) # plot a visual representation of the trace az.plot_trace(trace) # plot the posterior with the rope az.plot_posterior(trace, credible_interval=0.9, rope=[0.45, 0.55]) # plot the posterior with a reference value az.plot_posterior(trace, credible_interval=0.9, ref_val=0.5) # ------------------------ loss function analysis -------------------------------------------- # with bernoulli_model: # define a grid of points over which to evaluate the loss functions grid = np.linspace(0, 1, 200) # define loss function a loss_func_a = [np.mean(abs(i - trace["theta"])) for i in grid] # define loss function b loss_func_b = [np.mean((i - trace["theta"])**2) for i in grid] # define asymmetric loss function, loss function c loss_func_c = [] for i in grid:
Teff = pymc3.Uniform('Teff', lower=30000.0, upper=90000.0) logU = pymc3.Uniform('logU', lower=-4, upper=-1.5) # Interpolation coord grid_coord = tt.stack([[logU], [Teff], [OH]], axis=-1) # Loop throught for i in lineRange: if idx_analysis_lines[i]: # Line Flux lineInt = gridInterp[lineLabels[i]](grid_coord) # Line Intensity lineFlux = lineInt - cHbeta * lineFlambdas[i] # Inference Y_emision = pymc3.Normal(lineLabels[i], mu=lineInt, sd=inputFluxErr[i], observed=inputFlux[i]) displaySimulationData(model) trace = pymc3.sample(5000, tune=2000, chains=2, cores=1, model=model) print(trace) print(pymc3.summary(trace)) az.plot_trace(trace) plt.show() az.plot_posterior(trace) plt.show()
""" Posterior Plot ============== _thumb: .5, .8 """ import matplotlib.pyplot as plt import arviz as az az.style.use("arviz-darkgrid") data = az.load_arviz_data("centered_eight") coords = {"school": ["Choate"]} az.plot_posterior(data, var_names=["mu", "theta"], coords=coords, rope=(-1, 1)) plt.show()
def run(region, folder, load_trace=False, compute_sim=True, plot_posterior_dist = True): print("started ... " + region) if not os.path.exists(region): os.makedirs(region) # observed data (t_obs, datetimes, y_obs, n_pop, shutdown_day, u0, _) = data_fetcher.read_region_data(folder, region) y_obs = y_obs.astype(np.float64) u0 = u0.astype(np.float64) # set eqn eqn = Seir() eqn.population = n_pop eqn.tau = shutdown_day # set ode solver ti = t_obs[0] tf = t_obs[-1] m = 2 n_steps = m*(tf - ti) rk = RKSolverSeir(ti, tf, n_steps) rk.rk_type = "explicit_euler" rk.output_frequency = m rk.set_output_storing_flag(True) rk.equation = eqn du0_dp = np.zeros((eqn.n_components(), eqn.n_parameters())) rk.set_initial_condition(u0, du0_dp) rk.set_output_gradient_flag(True) # sample posterior with pm.Model() as model: # set prior distributions #beta = pm.Lognormal('beta', mu = math.log(0.4/n_pop), sigma = 0.4) #sigma = pm.Lognormal('sigma', mu = math.log(0.3), sigma = 0.5) #gamma = pm.Lognormal('gamma', mu = math.log(0.25), sigma = 0.5) #kappa = pm.Lognormal('kappa', mu = math.log(0.1), sigma = 0.5) #beta = pm.Normal('beta', mu = 0.4/n_pop, sigma = 0.06/n_pop) #sigma = pm.Normal('sigma', mu = 0.6, sigma = 0.1) #gamma = pm.Normal('gamma', mu = 0.3, sigma = 0.07) #kappa = pm.Normal('kappa', mu = 0.5, sigma = 0.1) #tint = pm.Lognormal('tint', mu = math.log(30), sigma = 1) beta = pm.Lognormal('beta', mu = math.log(0.1), sigma = 0.5) #math.log(0.3/n_pop), sigma = 0.5) sigma = pm.Lognormal('sigma', mu = math.log(0.05), sigma = 0.6) gamma = pm.Lognormal('gamma', mu = math.log(0.05), sigma = 0.6) kappa = pm.Lognormal('kappa', mu = math.log(0.2), sigma = 0.3) # math.log(0.001), sigma = 0.8) tint = pm.Lognormal('tint', mu = math.log(30), sigma = math.log(10)) dispersion = pm.Normal('dispersion', mu = 30., sigma = 10.) # set cached_sim object cached_sim = CachedSEIRSimulation(rk) # set theano model op object model = ModelOp(cached_sim) # set likelihood distribution y_sim = pm.NegativeBinomial('y_sim', mu=model((beta, sigma, gamma, kappa, tint)), alpha=dispersion, observed=y_obs) if not load_trace: # sample posterior distribution and save trace draws = 1000 #1000 tune = 500 #500 trace = pm.sample(draws=draws, tune=tune, cores=4, chains=4, nuts_kwargs=dict(target_accept=0.9), init='advi+adapt_diag') # using NUTS sampling # save trace pm.backends.text.dump(region + os.path.sep, trace) else: # load trace trace = pm.backends.text.load(region + os.path.sep) if plot_posterior_dist: # plot posterior distributions of all parameters data = az.from_pymc3(trace=trace) pm.plots.traceplot(data, legend=True) plt.savefig(region + os.path.sep + "trace_plot.pdf") az.plot_posterior(data, hdi_prob = 0.95) plt.savefig(region + os.path.sep + "post_dist.pdf") if compute_sim: #rk.set_output_gradient_flag(False) n_predictions = 7 rk.final_time = rk.final_time + n_predictions rk.n_steps = rk.n_steps + m*n_predictions y_sims = pm.sample_posterior_predictive(trace)['y_sim'][:,0,:] np.savetxt(region + os.path.sep + "y_sims.csv", y_sims, delimiter = ',') mean_y = np.mean(y_sims,axis=0) upper_y = np.percentile(y_sims,q=97.5,axis=0) lower_y = np.percentile(y_sims,q=2.5,axis=0) # plots dates = [dt.datetime.strptime(date, "%Y-%m-%d").date() for date in datetimes] pred_dates = dates + [dates[-1] + dt.timedelta(days=i) for i in range(1,1 + n_predictions)] np.savetxt(region + os.path.sep + "y_obs.csv", y_obs, delimiter = ',') dates_csv = pd.DataFrame(pred_dates).to_csv(region + os.path.sep + 'dates.csv', header=False, index=False) # linear plot font_size = 12 fig, ax = plt.subplots(figsize=(10, 10)) ax.plot(dates, y_obs, 'x', color='k', label='reported data') import matplotlib.dates as mdates ax.xaxis.set_major_formatter(mdates.DateFormatter('%d %b')) ax.xaxis.set_major_locator(mdates.DayLocator(bymonthday=(1,15))) plt.title(region[0].upper() + region[1:].lower() + "'s daily infections", fontsize = font_size) plt.xlabel('Date', fontsize = font_size) plt.ylabel('New daily infections', fontsize = font_size) ax.tick_params(axis='both', which='major', labelsize=10) # plot propagated uncertainty plt.plot(pred_dates, mean_y, color='g', lw=2, label='mean') plt.fill_between(pred_dates, lower_y, upper_y, color='darkseagreen', label='95% credible interval') plt.legend(loc='upper left') fig.autofmt_xdate() plt.savefig(region + os.path.sep + "linear.pdf") # log plot plt.yscale('log') plt.savefig(region + os.path.sep + "log.pdf") print("finished ... " + region)
plt.plot(xs, stats.norm.pdf(xs, mu, std_q), '--', label='Laplace') post_exact = stats.beta.pdf(xs, h + 1, t + 1) plt.plot(xs, post_exact, label='exact') plt.title('Quadratic approximation') plt.xlabel('θ', fontsize=14) plt.yticks([]) plt.legend() plt.savefig('../figures/bb_laplace.pdf') # HMC with pm.Model() as hmc_model: theta = pm.Beta('theta', 1., 1.) y = pm.Binomial('y', n=1, p=theta, observed=data) # Bernoulli trace = pm.sample(1000, random_seed=42) thetas = trace['theta'] axes = az.plot_posterior(thetas, credible_interval=0.95) plt.savefig('../figures/bb_hmc.pdf') az.plot_trace(trace) plt.savefig('../figures/bb_hmc_trace.pdf', dpi=300) # ADVI with pm.Model() as mf_model: theta = pm.Beta('theta', 1., 1.) y = pm.Binomial('y', n=1, p=theta, observed=data) # Bernoulli mean_field = pm.fit(method='advi') trace_mf = mean_field.sample(1000) thetas = trace_mf['theta'] axes = az.plot_posterior(thetas, credible_interval=0.95) plt.savefig('../figures/bb_mf.pdf')
def conduct_bayesian(observations_file_path, mu_init, beta_init): df = pd.read_csv(observations_file_path) # Get list of unique damage state values: ds_list = df['DS Number'].unique() for ds in range(0, len(ds_list)): df_sub = df.loc[df['DS Number'] == ds_list[ds]] xj = np.array(df_sub['demand']) zj = np.array(df_sub['fail']) nj = np.array(df_sub['total']) mu_ds = mu_init[ds] beta_ds = beta_init[ds] with pm.Model() as model: # Set up the prior: mu = pm.Normal('mu', mu_ds, 2.71) beta = pm.Normal('beta', beta_ds, 0.03) # Define fragility function equation: def normal_cdf(mu, beta, xj): """Compute the log of the cumulative density function of the normal.""" return 0.5 * (1 + tt.erf( (tt.log(xj) - mu) / (beta * tt.sqrt(2)))) # Define likelihood: # like = pm.Binomial('like', p=p, observed=zj, n=nj) like = pm.Binomial('like', p=normal_cdf(mu, beta, xj), observed=zj, n=nj) for RV in model.basic_RVs: print(RV.name, RV.logp(model.test_point)) # Determine the posterior trace = pm.sample(2000, cores=1, return_inferencedata=True) # Posterior predictive check are a great way to validate model: # Generate data from the model using parameters from draws from the posterior: ppc = pm.sample_posterior_predictive( trace, var_names=['mu', 'beta', 'like']) # Calculate failure probabilities using samples: im = np.arange(70, 200, 5) pf_ppc = [] for i in range(0, len(ppc['mu'])): y = pf(im, ppc['mu'][i], ppc['beta'][i]) pf_ppc.append(y) # Plot the HPD: _, ax = plt.subplots() az.plot_hdi(im, pf_ppc, fill_kwargs={ 'alpha': 0.2, 'color': 'blue', 'label': 'bounds of prediction: 94% HPD' }) # Calculate and plot the mean outcome: pf_mean = pf(im, ppc['mu'].mean(), ppc['beta'].mean()) ax.plot(im, pf_mean, label='mean of prediction', color='r', linestyle='dashed') # Plot the mean of the simulation-based fragility: pf_sim = pf(im, mu_ds, beta_ds) ax.plot(im, pf_sim, label='simulation-based', color='k') # Plot the observations: ax.scatter(xj, zj / nj, color='r', marker='^', label='observations') ax.legend() plt.show() # Looking at the difference between the prior of the parameters and updated distributions: new_mu_mean, new_mu_std = norm.fit(ppc['mu']) plt.hist(ppc['mu'], bins=25, density=True, alpha=0.4, color='b') xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) p_prior = norm.pdf(x, mu_ds, 2.71) p_new = norm.pdf(x, new_mu_mean, new_mu_std) plt.plot(x, p_prior, 'k', linewidth=2, label='prior distribution') plt.plot(x, p_new, 'r', linewidth=2, label='updated distribution', linestyle='dashed') # Note az.plot_violin(trace, var_names=['mu']) can be helpful for seeing distribution of parameter values # Plot the posterior distributions of each RV fig, ax = plt.subplots() az.plot_trace(trace, chain_prop={'color': ['blue', 'red']}) az.plot_posterior(trace) az.plot_forest(trace, var_names=['mu', 'beta']) plt.show() print(az.summary(trace))
def plot_posterior(self): az.plot_posterior(self.idata)
with model: trace = pm.sample(cores=1) sum = az.summary(trace, var_names=[ "alpha_mean", "alpha_sigma", "beta_fundraising", "alpha", "sigma", "beta_other" ]) az.plot_trace(trace, var_names=[ "alpha_mean", "alpha_sigma", "beta_fundraising", "sigma", "beta_other" ]) az.plot_posterior(trace, var_names=[ "alpha_mean", "alpha_sigma", "beta_fundraising", "sigma", "beta_other" ]) plt.show() # ---------------------------- study the shape of the trace ---------------------------------------------------- # with model: for i in [ "f", "eta", "ls", "sigma", "alpha", "beta_other", "beta_fundraising", "alpha_sigma", "alpha_mean", "mu" ]: log.info("The shape of trace for %s is: %s", i, trace[i].shape) # ---------------------------- get the gp process prediction on the existent sample ---------------------------- #
def plot_density(self, **kwargs): """Plot Posterior densities in the style of John K. Kruschke’s book.""" return az.plot_posterior(self.data, **kwargs)
theta_0 = pm.Normal('intercept', mu=0, sigma=2) theta_1 = pm.Normal('coefx', mu=0, sigma=2) theta_2 = pm.Normal('coefxSqd', mu=0, sigma=2) theta = pm.Deterministic('theta', theta_0 + theta_1*xs + theta_2*xs**2) sigma = pm.HalfCauchy('sigma', 100) y_lik = pm.Normal('y_lik', mu=theta, sigma=sigma, observed=y) trace_linear = pm.sample(tune=2000, chains=1, cores=1) pp_samples = pm.sample_posterior_predictive(trace=trace_linear, random_seed=123) y_pred = pp_samples['y_lik'].mean(axis=0) _, axi = plt.subplots(1, 4, figsize=(8, 5)) sns.scatterplot(x, y_obs, ax=axi[0]).set_title("Data") sns.lineplot(x, y_pred, ax=axi[0]) az.plot_hdi(x, trace_linear['theta'], hdi_prob=0.98, ax=axi[0], color='gray') az.plot_posterior(trace_linear, var_names=['intercept', 'coefx'], ax=axi[1]) az.plot_posterior(trace_linear, var_names=['coefx'], ax=axi[2]) az.plot_posterior(trace_linear, var_names=['coefxSqd'], ax=axi[3]) plt.show() with linear_Model: pm.set_data({'xs': [1, 5.6, 4]}) y_test = pm.sample_posterior_predictive(trace=trace_linear) print(y_test['y_lik'].mean(axis=0)) print(1 + 3.2 * 1 + 4 * 1**2)
data = stats.bernoulli.rvs(p=theta_real, size=trials) data with pm.Model() as our_first_model: # a priori theta = pm.Beta('theta', alpha=1., beta=1.) # likelihood y = pm.Bernoulli('y', p=theta, observed=data) trace = pm.sample(3000, random_seed=123) # ### Summarizing the posterior az.plot_trace(trace) plt.savefig('B11197_02_01.png') az.summary(trace) az.plot_posterior(trace) plt.savefig('B11197_02_02.png', dpi=300) az.plot_posterior(trace, rope=[0.45, .55]) plt.savefig('B11197_02_03.png', dpi=300) az.plot_posterior(trace, ref_val=0.5) plt.savefig('B11197_02_04.png', dpi=300) grid = np.linspace(0, 1, 200) theta_pos = trace['theta'] lossf_a = [np.mean(abs(i - θ_pos)) for i in grid] lossf_b = [np.mean((i - θ_pos)**2) for i in grid] for lossf, c in zip([lossf_a, lossf_b], ['C0', 'C1']): mini = np.argmin(lossf)
""" Posterior Plot ============== _thumb: .5, .8 """ import arviz as az az.style.use('arviz-darkgrid') data = az.load_arviz_data('centered_eight') coords = {'school': ['Choate']} az.plot_posterior(data, var_names=['mu', 'theta'], coords=coords, rope=(-1, 1))