def compute_hdp(samples, lims): ndim = np.ndim(samples) if ndim == 1: result = np.zeros(5) else: size = samples.shape result = np.zeros((5, size[1])) if ndim == 1: for i in range(len(lims)): kk = az.hpd(samples, credible_interval=lims[i]) if i == 0: result[2] = np.mean(kk) elif i == 1: result[1] = kk[0] result[3] = kk[1] elif i == 2: result[0] = kk[0] result[4] = kk[1] else: for j in range(size[1]): for i in range(len(lims)): kk = az.hpd(samples[:, j], credible_interval=lims[i]) if i == 0: result[2, j] = np.mean(kk) elif i == 1: result[1, j] = kk[0] result[3, j] = kk[1] elif i == 2: result[0, j] = kk[0] result[4, j] = kk[1] return result
def hdi(self, var_name: str, credible_mass: float = 0.95): """Calculate the highest posterior density interval (HDI) This function calculates a *credible interval* which contains the ``credible_mass`` most likely values of the parameter, given the data. Also known as an HPD interval. Parameters ---------- var_name : str Name of variable. credible_mass : float The HDI will cover credible_mass * 100% of the probability mass. Default: 0.95, i.e. a 95% HDI. Returns ------- (float, float) The endpoints of the HPD """ check_credible_mass(credible_mass) az_major, az_minor, *_ = arviz.__version__.split('.') if (int(az_major), int(az_minor)) >= (0, 8): return tuple(arviz.hdi(self.trace[var_name], hdi_prob=credible_mass)) else: return tuple(arviz.hpd(self.trace[var_name], credible_interval=credible_mass))
def plot_samples(self, samples, titles, num_samples, color='grey', scatters=None, scatter_args={}, legend=True, margined=False, sample_gap=2): num_components = samples[0].shape[0] subplot_shape = ((num_components + 2) // 3, 3) if self.opt.for_report: subplot_shape = ((num_components + 1) // 2, 2) if num_components <= 1: subplot_shape = (1, 1) for j in range(num_components): ax = plt.subplot(subplot_shape[0], subplot_shape[1], 1 + j) plt.title(titles[j]) if scatters is not None: plt.scatter(self.τ[self.common_ind], scatters[j], marker='x', label='Observed', **scatter_args) # plt.errorbar([n*10+n for n in range(7)], Y[j], 2*np.sqrt(Y_var[j]), fmt='none', capsize=5) for s in range(1, sample_gap * num_samples, sample_gap): kwargs = {} if s == 1: kwargs = {'label': 'Samples'} plt.plot(self.τ, samples[-s, j, :], color=color, alpha=0.5, **kwargs) if j % subplot_shape[1] == 0: plt.ylabel(self.opt.ylabel) # HPD: bounds = arviz.hpd(samples[-self.opt.num_hpd:, j, :], credible_interval=0.95) plt.fill_between(self.τ, bounds[:, 0], bounds[:, 1], color='grey', alpha=0.3, label='95% credibility interval') plt.xticks(self.t) ax.set_xticklabels(self.t) if margined: plt.ylim(min(samples[-1, j]) - 2, max(samples[-1, j]) + 2) # if self.opt.for_report: # plt.ylim(-0.2, max(samples[-1, j]) + 0.2) plt.xlabel('Time (h)') if legend: plt.legend() plt.tight_layout()
def plot_poserterior_mean(trace_mu,x_val, y_val,credible_interval = .97): idx = np.argsort(x_val) mu_hpd = az.hpd(trace_mu, credible_interval=credible_interval) plt.plot(x_val, y_val, marker = 'o', linestyle = '') plt.plot(x_val[idx], trace_mu.mean(axis = 0)[idx], linestyle = '-') plt.fill_between(x_val[idx],mu_hpd[idx,0],mu_hpd[idx,1],color = 'grey', alpha =.3) plt.xlabel('rugged') plt.ylabel('log gdp') return plt
def compute_hdi(arr, credible_interval=0.64): """ Given array of (simulations, dimensions) computes Highest Density Intervals Sample dimension should be first dimension :param arr: Array of shape (n_samples, n_genes) :param credible_interval: :return: """ return az.hpd(arr, credible_interval=credible_interval)
def plot_bar_hpd(self, var_samples, var, labels, true_var=None, width=0.1, titles=None, rotation=0, true_hpds=None): hpds = list() num = var.shape[0] plotnum = var.shape[1] * 100 + 21 for i in range(num): hpds.append( arviz.hpd(var_samples[-self.opt.num_hpd:, i, :], credible_interval=0.95)) hpds = np.array(hpds) hpds = abs(hpds - np.expand_dims(var, 2)) for k in range(var_samples.shape[2]): plt.subplot(plotnum) plotnum += 1 plt.bar(np.arange(num) - width, var[:, k], width=2 * width, tick_label=labels, color='chocolate', label=self.opt.model_label) plt.errorbar(np.arange(num) - width, var[:, k], hpds[:, k].swapaxes(0, 1), fmt='none', capsize=5, color='black') plt.xlim(-1, num) plt.xticks(rotation=rotation) if titles is not None: plt.title(titles[k]) if true_var is not None: plt.bar(np.arange(num) + width, true_var[:, k], width=2 * width, color='slategrey', align='center', label=self.opt.true_label) if true_hpds is not None: plt.errorbar(np.arange(num) + width, true_var[:, k], true_hpds[:, k].swapaxes(0, 1), fmt='none', capsize=5, color='black') plt.legend() plt.tight_layout() return hpds
def hpd(y, credible_interval=0.94): y = np.asarray(y) y_shape = y.shape if 0 in y_shape: return (np.array([]), np.array([])) hpd_ = az.hpd(y, credible_interval=credible_interval, circular=False, multimodal=False) if hpd_.ndim == 1: hpd_ = np.expand_dims(hpd_, axis=0) return (hpd_[:, 0], hpd_[:, 1])
def make_plot(trace): plot_training_data() # plot logistic curve theta = trace['θ'].mean(axis=0) idx = np.argsort(x_c) plt.plot(x_c[idx], theta[idx], color='C2', lw=3) az.plot_hpd(x_c, trace['θ'], color='C2') # plot decision boundary plt.vlines(trace['bd'].mean(), 0, 1, color='k') bd_hpd = az.hpd(trace['bd']) plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='k', alpha=0.5)
def hpd_area(x, y, credible_interval=0.94, smooth=True): """ Returns the x-,y-coordinates of the highest posterior density area of the predicted values. Parameters: ----------- x The x-coordinate of the data. y The predicted values of y-coordinate of the data. credible_interval The credible_interval for the hpd Returns: -------- A Tuple (x,y) of the x-,y-coordinates of the hpd area. """ x = np.asarray(x) y = np.asarray(y) x_shape = x.shape y_shape = y.shape if 0 in x_shape or 0 in y_shape: return (np.array([]), np.array([])) if y_shape[-len(x_shape):] != x_shape: msg = "Dimension mismatch for x: {} and y: {}." msg += " y-dimensions should be (chain, draw, *x.shape) or" msg += " (draw, *x.shape)" raise TypeError(msg.format(x_shape, y_shape)) if len(y_shape[:-len(x_shape)]) > 1: new_shape = tuple([-1] + list(x_shape)) y = y.reshape(new_shape) hpd_ = az.hpd(y, credible_interval=credible_interval, circular=False, multimodal=False) if smooth: x_data = np.linspace(x.min(), x.max(), 200) x_data[0] = (x_data[0] + x_data[1]) / 2 hpd_interp = griddata(x, hpd_, x_data) y_data = savgol_filter(hpd_interp, axis=0, window_length=55, polyorder=2) return (np.concatenate((x_data, x_data[::-1])), np.concatenate((y_data[:, 0], y_data[:, 1][::-1]))) else: return (np.concatenate( (x, x[::-1])), np.concatenate((hpd_[:, 0], hpd_[:, 1][::-1])))
def hpd_shade(ax, interval, samples): """ Caculate high prob density intervals using arviz and fill in between the interval. ACCEPTS ax [matplotlib axes] interval [float] fraction of the hpd localization samples [2d array] posterior samples. Columns are time points, rows samples """ my_hpd = az.hpd(samples, credible_interval=interval) ax.fill_between(datadict['t_sim'], my_hpd[:, 0], my_hpd[:, 1], alpha=0.2, color='C1')
def hdi(x: np.ndarray, credible_interval: float = 0.94) -> np.ndarray: """Calculate highest density interval (HDI). This function acts as an alias to `arviz.hpd` function. Parameters ---------- x Array containing MCMC samples. credible_interval Credible interval to compute. Defaults to 0.94. Returns ------- np.ndarray Array containing the lower and upper value of the computed interval. """ return az.hpd(x, credible_interval=credible_interval)
plt.show() #%% [markdown] # ### Code 4.19-.20 #%% sns.kdeplot(sample_mu) plt.xlabel('sample_mu') plt.ylabel('density') plt.show() sns.kdeplot(sample_sig) plt.xlabel('sample_sigma') plt.ylabel('density') plt.show() #%% print(az.hpd(sample_mu, credible_interval=0.5)) az.hpd(sample_sig, credible_interval=0.5) #%% [markdown] # ### Code 4.21 #%% d3 = np.random.choice(d2.height, size=20, replace=False) #%% [markdown] # ### Code 4.22-.23 # We are doing this because we want to show that the posterior is not always # gaussian in shape. This is not driven by the mean it's more driven by the variance which tends to have this right tail #%% mu_list = np.linspace(150, 170, num=200) sigma_list = np.linspace(4, 20, num=200) post = np.array(np.meshgrid(mu_list, sigma_list)).reshape(2, -1).T
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--sys-dir", help="specifies the parent folder for all config sys", type=str, required=False, default="/application/Distance-Based_Data/SupplementaryWebsite/", ) parser.add_argument( "--results-dir", help="specifies the parent folder for all run results", type=str, required=False, default="/results/", ) parser.add_argument( "--conf-steps", help="how many steps to check for confidence calibration", type=int, default=20, ) parser.add_argument( "--predictive-samples", help="how many samples to draw from the predictive distribution", type=int, default=4000, ) args = parser.parse_args() sys_dir = args.sys_dir results_dir = args.results_dir conf_steps = args.conf_steps n_post_samples = args.predictive_samples inference_data = os.path.join(results_dir, "last-inference") output = os.path.join(results_dir, "last-evaluation") contents = get_result_files(inference_data) repos = {} idx_keys = None obs = [] for args, hist, tracer in contents: t = int(args["t"]) sys_name = args["sys_name"] run_id = get_run_id_from_path(args["folder"]) attribute = args["attribute"] if sys_name not in repos: repo = DistBasedRepo(sys_dir, sys_name, attribute) repos[sys_name] = repo repo = repos[sys_name] x_eval = list(repo.all_configs.keys()) y_eval = list(repo.all_configs.values()) err_dict = {} for it in hist: idx = tuple(args.values()) model = hist[it]["prob-model"] trace = hist[it]["trace"] fitting_time = tracer.fitting_times[it] weighted_errs_per_sample = list(tracer.weighted_errs_per_sample) weighted_rel_errs_per_sample = list( tracer.weighted_rel_errs_per_sample) err_dict[it] = { "weighted_errs_per_sample": weighted_errs_per_sample, "weighted_rel_errs_per_sample": weighted_rel_errs_per_sample } fitting_time = tracer.fitting_times[it] ft_selection_seconds = tracer.prior_spectrum_cost t_start = time.time() alt_preds = tracer.predict_raw(x_eval) with model: ppc = pm.sample_posterior_predictive( trace, samples=n_post_samples, ) t_end = time.time() pred_time = t_end - t_start y_samples = ppc["y_observed"] # y_pred_median = np.median(y_samples, axis=0) y_pred = np.mean(az.hpd(y_samples, credible_interval=0.01), axis=1) correct_in_dict = {} conf_mape_dict = {} conf_mape_without_zeros = {} intervals = np.linspace(0, 1, conf_steps + 1)[1:-1] print("computing confidence accuracies") for perc in intervals: correct_in_dict[perc] = float( Tracer.calc_confidence_err(perc, y_eval, y_samples)) conf_mape_dict[perc], conf_mape_without_zeros[ perc] = Tracer.calc_confidence_closest_mape( perc, y_eval, y_samples) print("Finished", perc) # correct_in_095 = float(Tracer.calc_confidence_err(0.95, y_eval, y_samples)) assert len(y_eval) == len(y_pred) eval_mape = score_mape(None, None, y_eval, y_pred) results = { "args": args, "it": it, "eval_mape": eval_mape, "fitting_time": fitting_time, "ft_selection_seconds": ft_selection_seconds, "n_post_samples": n_post_samples, "pred_time": pred_time, "corect_in_conf": correct_in_dict, "mape_to_nearest_conf_bound": conf_mape_dict } obs.append(results) print("storing pickle") if not os.path.exists(output): os.mkdir(output) out_path = os.path.join(output, run_id + ".p") with open(out_path, 'wb') as f: pickle.dump(obs, f) obs_str = pprint.PrettyPrinter().pformat(obs) str_out_path = os.path.join(output, run_id + ".txt") with open(str_out_path, 'w') as f: f.write(obs_str) out_path_err_dist = os.path.join(output, "errs-" + run_id + ".p") with open(out_path_err_dist, 'wb') as f: pickle.dump(err_dict, f) print("Index:", idx_keys) print("DONE")
observed=data["Marriage_std"].values) prior_samples = pm.sample_prior_predictive() m_5_4_trace = pm.sample() # %% mu_m_5_4_mean = m_5_4_trace["mu"].mean(axis=0) residuals = data["Marriage_std"] - mu_m_5_4_mean # %% with m_5_4: m_5_4_ppc = pm.sample_posterior_predictive(m_5_4_trace, var_names=["mu", "divorce_std"], samples=1000) mu_mean = m_5_4_ppc["mu"].mean(axis=0) mu_hpd = az.hpd(m_5_4_ppc["mu"], credible_interval=0.89) D_sim = m_5_4_ppc["divorce_std"].mean(axis=0) D_PI = az.hpd(m_5_4_ppc["divorce_std"], credible_interval=0.89) # %% fig, ax = plt.subplots(figsize=(6, 6)) plt.errorbar( data["Divorce_std"].values, m_5_4_ppc["divorce_std"].mean(0), yerr=np.abs(m_5_4_ppc["divorce_std"].mean(0) - mu_hpd.T), fmt="C0o", ) ax.scatter(data["Divorce_std"].values, D_sim) min_x, max_x = data["Divorce_std"].min(), data["Divorce_std"].max()
# ax=ax, # fill_kwargs={"alpha": 0.8, "color": "#a1dab4", "label": "Outcome 94% HPD"}, # ) # # ax.set_xlabel("Predictor (stdz)") # ax.set_ylabel("Outcome (stdz)") # ax.set_title("Posterior predictive checks") # ax.legend(ncol=2, fontsize=10); # ax.set_xscale('log') # ax.set_yscale('log') # plt.show() # # az.plot_posterior(ppc_ms['dem']) # Find equations for limits of HPD hpd = pandas.DataFrame(az.hpd(ppc_ms['dem']), columns=['lower', 'upper']) hpd['lower'] = 10**hpd['lower'] hpd['upper'] = 10**hpd['upper'] ppc_coefs = {} for name, col in hpd.iteritems(): ppc_coefs[name] = LinearRegression(fit_intercept=False).fit( ms_df['bar_width'].values.reshape(-1, 1), hpd[name].values).coef_[0] # Group data by river ms_group = ms_df.groupby(['river', 'bar']) group_river = ms_df.groupby('river') group_bar = bar_df.groupby('river') # Initialize the visulizer class vh = Visualizer.Visualizer()
with model_rlg: log.info("The summary on the trace is as follows: %s", az.summary(trace, var_names=["alpha", "beta", "db", "pie"])) az.plot_trace(trace, var_names=["alpha", "beta", "db", "pie"]) # ------------------- plots ------------------------------------------- # # get the mean of theta theta_mean = trace["theta"].mean(0) # get idx idx = np.argsort(x_c) # plot the predicted p of the data plt.figure() plt.plot(x_c[idx], theta_mean[idx], color="C2", lw=3) # set a vertical line at the mean of the decision boundary plt.vlines(trace["db"].mean(), 0, 1, color="k") # get the hpd of db db_hpd = az.hpd(trace["db"]) plt.fill_betweenx([0, 1], db_hpd[0], db_hpd[1], color="k", alpha=0.5) plt.scatter(x_c, np.random.normal(y_0, 0.02), marker='.', color=[f'C{x}' for x in y_0]) az.plot_hpd(x_c, trace["theta"], color="C2") plt.xlabel("petal length") plt.ylabel("theta", rotation=0) locs, _ = plt.xticks() plt.xticks(locs, np.round(locs + x_0.mean(), 1)) plt.show()
yl = pm.Bernoulli('yl', p=theta, observed=y_0) trace_0 = pm.sample(1000) # In[9]: varnames = ['alpha', 'beta', 'bd'] az.summary(trace_0, varnames) # In[10]: theta = trace_0['theta'].mean(axis=0) idx = np.argsort(x_c) plt.plot(x_c[idx], theta[idx], color='C2', lw=3) plt.vlines(trace_0['bd'].mean(), 0, 1, color='k') bd_hpd = az.hpd(trace_0['bd']) plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='k', alpha=0.5) plt.scatter(x_c, np.random.normal(y_0, 0.02), marker='.', color=[f'C{x}' for x in y_0]) az.plot_hpd(x_c, trace_0['theta'], color='C2') plt.xlabel(x_n) plt.ylabel('theta', rotation=0) # use original scale for xticks locs, _ = plt.xticks() plt.xticks(locs, np.round(locs + x_0.mean(), 1)) plt.savefig('B11197_04_04.png', dpi=300)
# %% np.percentile(samples, [10, 90]) # %% p_grid, posterior = posterior_grid_approx(success=3, tosses=3) plt.plot(p_grid, posterior) plt.xlabel("proportion water (p)") plt.ylabel("Density") # %% samples = np.random.choice(p_grid, p=posterior, size=int(1e4), replace=True) np.percentile(samples, [25, 75]) # %% az.hpd(samples, credible_interval=0.5) # %% p_grid[posterior == max(posterior)] # %% stats.mode(samples)[0] # %% np.mean(samples), np.median(samples) # %% sum(posterior * abs(0.5 - p_grid)) # %% loss = [sum(posterior * abs(p - p_grid)) for p in p_grid]
varnames = ['alpha', 'beta', 'sigma'] pm.summary(trace51, varnames=varnames) #%% [markdown] # ## Code 5.2 #%% new_x_values = np.linspace(-3, 3.5, num=30) shared_x.set_value(new_x_values) shared_y.set_value(np.repeat(0, repeats=len(new_x_values))) with m51: post_pred = pm.sample_posterior_predictive(trace51, samples=1000, model=m51) #%% mu_hpd = az.hpd(trace51['mu'], credible_interval=.89) post_pred_hpd = az.hpd(post_pred['divorce'], credible_interval=.89) #%% idx = sort_vals(d.medianagemarriage_s) sorted_x_vals = d.medianagemarriage_s[idx] plt.figure(figsize=(10, 8)) plt.plot(d.medianagemarriage_s.values, d.divorce.values, color='blue', marker='.', linestyle='') plt.plot(sorted_x_vals, trace51['alpha'].mean() + np.mean(trace51['beta']) * sorted_x_vals, color='black',
trace["a"] + trace["a_actor"][:, actor] + (trace["bp"] + trace["bpC"] * condition) * prosoc_left ) return logistic(logodds) # %% prosoc_left = [0, 1, 0, 1] condition = [0, 0, 1, 1] pred_raw = np.asarray( [p_link(p_l, c_d, 2 - 1, trace_12_4) for p_l, c_d in zip(prosoc_left, condition)] ).T pred_p = pred_raw.mean(axis=0) pred_p_PI = az.hpd(pred_raw, credible_interval=0.89) # %% d_pred = pd.DataFrame( dict(prosoc_left=[0, 1, 0, 1], condition=[0, 0, 1, 1], actor=np.repeat(2 - 1, 4)) ) # %% a_actor_zeros = np.zeros((1000, 7)) # %% def p_link(prosoc_left, condition, actor_sim, trace): Nsim = actor_sim.shape[0] // trace.nchains trace = trace[:Nsim]
# Plot results _, ax = plt.subplots(figsize=(10, 6)) fp = logistic(pred_samples['f_pred']) fp_mean = np.mean(fp, 0) ax.plot(X_new[:, 0], fp_mean) # plot the data (with some jitter) and the true latent function ax.scatter(x_1, np.random.normal(y, 0.02), marker='.', color=[f'C{x}' for x in y]) az.plot_hpd(X_new[:, 0], fp, color='C2') db = np.array([find_midpoint(f, X_new[:, 0], 0.5) for f in fp]) db_mean = db.mean() db_hpd = az.hpd(db) ax.vlines(db_mean, 0, 1, color='k') ax.fill_betweenx([0, 1], db_hpd[0], db_hpd[1], color='k', alpha=0.5) ax.set_xlabel('sepal_length') ax.set_ylabel('θ', rotation=0) plt.savefig('../figures/gp_classify_iris1.pdf', dpi=300) # Change kernel to be sum of SE and linear, to improve tail behavior with pm.Model() as model_iris2: #ℓ = pm.HalfCauchy("ℓ", 1) ℓ = pm.Gamma('ℓ', 2, 0.5) c = pm.Normal('c', x_1.min()) τ = pm.HalfNormal('τ', 5) cov = (pm.gp.cov.ExpQuad(1, ℓ) + τ * pm.gp.cov.Linear(1, c) +
x_c = x_0 - x_0.mean() with pm.Model() as model_simple: α = pm.Normal('α', mu=0, sd=10) β = pm.Normal('β', mu=0, sd=10) μ = α + pm.math.dot(x_c, β) θ = pm.Deterministic('θ', pm.math.sigmoid(μ)) bd = pm.Deterministic('bd', -α/β) y_1 = pm.Bernoulli('y_1', p=θ, observed=y_simple) trace_simple = pm.sample(1000, tune=1000) #%% this is slow convergence theta = trace_simple['θ'].mean(axis=0) idx = np.argsort(x_c) plt.plot(x_c[idx], theta[idx], color='C2', lw=3) plt.vlines(trace_simple['bd'].mean(), 0, 1, color='k') bd_hpd = az.hpd(trace_simple['bd']) plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='k', alpha=0.5) plt.scatter(x_c, np.random.normal(y_simple, 0.02), marker='.', color=[f'C{x}' for x in y_simple]) az.plot_hpd(x_c, trace_simple['θ'], color='C2') plt.xlabel(x_n) plt.ylabel('θ', rotation=0) locs, _ = plt.xticks() plt.xticks(locs, np.round(locs + x_0.mean(), 1)); #%% data['age'] = data['age'] / 10 data['age2'] = np.square(data['age']) with pm.Model() as logistic_model: pm.glm.GLM.from_formula('outcome ~ age + age2 + job + marital + education + default + housing + loan + contact + month + day_of_week + duration + campaign + pdays + previous + euribor3m', data, family = pm.glm.families.Binomial()) trace = pm.sample(500, tune = 500, init = 'adapt_diag') az.plot_trace(trace);
# 7.5 m7_3.name = 'm73' m7_4.name = 'm74' pm.compare({m7_3:tracem73, m7_4:tracem74}) # 7.6 rugged_seq = np.arange(-1,8,.25) mu_Af = np.zeros((len(rugged_seq),tracem74['mu'].shape[0])) mu_noAf = np.zeros((len(rugged_seq),tracem74['mu'].shape[0])) for row, seq in enumerate(rugged_seq): mu_Af[row,:] = tracem74['alpha'] + tracem74['beta']*rugged_seq[row] + tracem74['beta2']*1 mu_noAf[row,:] = tracem74['alpha'] + tracem74['beta']*rugged_seq[row] + tracem74['beta2']*0 hpd_af = az.hpd(mu_Af.T,credible_interval=.97) hpd_noaf = az.hpd(mu_noAf.T,credible_interval=.97) plt.plot(da1.rugged, da1.log_gdp, marker = 'o', linestyle = '', color = 'blue') plt.plot(rugged_seq, mu_Af.mean(1), color = 'blue') plt.fill_between(rugged_seq, hpd_af[:,0], hpd_af[:,1], alpha = .4) plt.plot(da0.rugged, da0.log_gdp, marker = 'o', linestyle = '', color = 'black') plt.plot(rugged_seq, mu_noAf.mean(1), color = 'black') plt.fill_between(rugged_seq, hpd_noaf[:,0], hpd_noaf[:,1], alpha = .2, color = 'black') plt.xlabel('Terrain Ruggedness Index') plt.ylabel('log GDP') # 7.7 with pm.Model() as m7_5: alpha = pm.Normal('alpha', mu = 8, sigma = 100)
def get_hpd(x, ci): if len(x) == 0: return np.array([np.nan, np.nan]) return az.hpd(x[~np.isnan(x)], credible_interval=ci)
trace_0 = pm.sample(1000) varnames = ['α', 'β', 'bd'] az.summary(trace_0, varnames) theta = trace_0['θ'].mean(axis=0) idx = np.argsort(x_c) plt.figure() # plot logistic curve plt.plot(x_c[idx], theta[idx], color='C2', lw=3) az.plot_hpd(x_c, trace_0['θ'], color='C2') # plot decision boundary plt.vlines(trace_0['bd'].mean(), 0, 1, color='k') bd_hpd = az.hpd(trace_0['bd']) plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='k', alpha=0.5) # plot jittered data plt.scatter(x_c, np.random.normal(y_0, 0.02), marker='.', color=[f'C{x}' for x in y_0]) plt.xlabel(x_n) plt.ylabel('p(y=1)', rotation=0) # use original scale for xticks locs, _ = plt.xticks() plt.xticks(locs, np.round(locs + xmean, 1)) #plt.xticks(x_c[idx], np.round(x_0[idx], 1)) #plt.savefig('../figures/logreg_bayes_1d_sat.pdf', dpi=300)