def show_posteriors(self, kde=True): if hasattr(self, 'burned_trace'): pm.plots.traceplot(trace=self.burned_trace, varnames=["std", "beta", "alpha"]) pm.plot_posterior(trace=self.burned_trace, varnames=["std", "beta", "alpha"], kde_plot=kde) pm.plots.autocorrplot(trace=self.burned_trace, varnames=["std", "beta", "alpha"]) else: print("You must sample from the posteriors first.")
def effect_difference(effect1, effect2, name1, name2, CI=95.0, show=True): diff = effect1 - effect2 label = str(name1) + ' - ' + str(name2) plt.figure(figsize=(4, 3)) plt.locator_params(nbins=4) plt.hist(diff, bins=100, label=label) plt.legend() if show: plt.show() fig = plt.figure(figsize=(3, 3)) plt.locator_params(nbins=4) ax = fig.gca() pm.plot_posterior(effect1 - effect2, varnames=[name1, name2], ref_val=0, color='#87ceeb', ax=ax) ax.set_title(label) if show: plt.show() low_p = (100.0 - CI) / 2.0 high_p = low_p + CI print(label, str(CI) + ' CI:', np.percentile(diff, low_p), np.percentile(diff, high_p), 'Pr > 0:', (diff > 0).mean()) return fig
def do_differences(recalculate=False): trace_cols = ["duration", "dist_err", "x_err", "y_err", "rms_x", "rms_y"] trace_coeffs = [[(5, 50), 50]] + [[(0, 2), 0.5]]*5 trace_cols = ["path_length", "move_l", "move_r", "move_x", "move_b", "move_f", "move_y"] trace_coeffs = [[(0, 10), 10], [(0, 5), 3], [(0, 5), 3], [(0, 10), 6], [(0, 5), 1.5], [(0, 5), 7], [(0, 10), 9]] if recalculate: traces = analyze_differences(analyses, trace_cols, trace_coeffs) else: traces = {col: load_best_result(col) for col in trace_cols} for col, best_result in traces.items(): trace = best_result.trace with figure(f"mean_std_{col}"): ax = pm.plot_posterior(trace[100:], varnames=[r"group1_mean", r"group2_mean", r"group1_std", "group2_std"], kde_plot=True, color="C0") for a in (1, 3): ax[a].lines[0].set_color("C1") with figure(f"difference_{col}"): pm.plot_posterior(trace[1000:], varnames=["difference of means", "effect size"], ref_val=0, kde_plot=True, color="C2")
def summarize(best_result, kde=True, plot=True): trace, model = best_result if plot: ax = pm.plot_posterior(trace[100:], varnames=[ r"group1_mean", r"group2_mean", r"group1_std", "group2_std", r"ν_minus_one" ], kde_plot=kde, color="C0") if kde: for a in (1, 3): ax[a].lines[0].set_color("C1") plt.figure() pm.plot_posterior(trace[1000:], varnames=[ "difference of means", "difference of stds", "effect size" ], ref_val=0, kde_plot=True, color="C2") plt.figure() pm.forestplot(trace[1000:], varnames=[v.name for v in model.vars[:2]]) plt.figure() pm.forestplot(trace[1000:], varnames=[v.name for v in model.vars[2:]]) pm.summary( trace[1000:], varnames=["difference of means", "difference of stds", "effect size"])
def main(): with pm.Model() as model: # Using a strong prior. Meaning the mean is towards zero than towards 1 prior = pm.Beta('prior', 0.5, 3) output = pm.Binomial('output', n=100, observed=50, p=prior) step = pm.Metropolis() trace = pm.sample(1000, step=step) pm.traceplot(trace) pm.plot_posterior(trace, figsize=(5, 5), kde_plot=True, rope=[0.45, 0.55]) # Rope is an interval that you define # This is a value you eppect. You can check # If ROPE fall on HPD or not. If it falls, it means # our value is within HPD and may be increasing sample # size would make our mean estimate better. # gelman rubin pm.gelman_rubin(trace) # forestplot pm.forestplot(trace, varnames=['prior']) # summary [look at mc error here. This is the std error, should be low] pm.df_summary(trace) #autocorrelation pm.autocorrplot(trace) # effective size pm.effective_n(trace)['prior']
def main(): # Hyperparameters n_flips = 125 n_coins = 10 n_draws = 5000 n_init_steps = 10000 n_burn_in_steps = 1000 # Create Causal Distribution causal_probs = np.random.uniform(size=n_coins) # Create Observations X = np.array([ np.random.choice(2, p=[1 - p_, p_], size=n_flips) for i, p_ in enumerate(causal_probs) ]).T # Create Model with pm.Model() as model: ps = pm.Beta('probs', alpha=1, beta=1, shape=n_coins) components = pm.Bernoulli.dist(p=ps, shape=n_coins) w = pm.Dirichlet('w', a=np.ones(n_coins)) mix = pm.Mixture('mix', w=w, comp_dists=components, observed=X) # Train Model with model: trace = pm.sample(n_draws, n_init=n_init_steps, tune=n_burn_in_steps) # Display Results pm.plot_trace(trace, var_names=['w', 'probs']) plt.show() pm.plot_posterior(trace, var_names=['w', 'probs']) plt.show()
def display_posterior(trace, filename): prj = project_reader(filename) WP_NAMES = np.array(prj[1][:, 0]) WP_NUMBER = prj[1][:, 0].shape[0] PV_names = list() PVpartial_names = list() EV_names = list() COMP_names = list() SPI_names = list() CPI_names = list() Index_names = ["SPI_PROJECT", "CPI_PROJECT", "ETC", "EAC", "TEAC"] RISK_names = list() projectDefinition = prj[1] for x in range(WP_NUMBER): for y in range(2): if (projectDefinition[x][y + 1] != 0): rname = projectDefinition[x][0] + "_Risk_%d" % (y + 1) RISK_names.append(rname) for x in range(WP_NUMBER): PV_names.append("PV_%s" % WP_NAMES[x]) PVpartial_names.append("Partial_PV_%s" % WP_NAMES[x]) EV_names.append("EV_%s" % WP_NAMES[x]) COMP_names.append("COMPLETION_%s" % WP_NAMES[x]) SPI_names.append("SPI_%s" % WP_NAMES[x]) CPI_names.append("CPI_%s" % WP_NAMES[x]) all_names = RISK_names + PV_names + PVpartial_names + EV_names + COMP_names + SPI_names + CPI_names + Index_names print( pm.summary(trace, varnames=all_names, stat_funcs=[trace_mean, trace_sd, trace_quantiles])) pm.plot_posterior(trace, varnames=all_names)
def save_mat_fig(trace_array, gtype="traceplot"): """ save the traceplot array to an image source :param trace_array: :return: """ # fig = Figure() if gtype == "traceplot": print("ENTER save_traceplot") pm.traceplot(trace_array, figsize=(12,12)) print("PLT.GCF()") fig = plt.gcf() print("FIGURE") buf = io.BytesIO() print("BUFF") fig.savefig(buf, format="png") data = base64.b64encode(buf.getbuffer()).decode("utf8") elif gtype == "posterior": print("ENTER plot_posterior") pm.plot_posterior(trace_array, figsize=(12,12)) fig = plt.gcf() buf = io.BytesIO() fig.savefig(buf, format="png") data = base64.b64encode(buf.getbuffer()).decode("utf8") else: data = [] return "data:image/png;base64,{}".format(data)
def plot_traces(traces, names, show=True): fig_1 = plt.figure() plt.locator_params(nbins=4) for (t, n) in zip(traces, names): plt.hist(t, bins=100, label=n) plt.legend() if show: plt.show() N = len(traces) + 1 figs = [] for (t, n, i) in zip(traces, names, range(1, N)): fig = plt.figure(figsize=(3, 3)) plt.locator_params(nbins=4) ax = fig.gca() # ax = fig_2.add_subplot(1, N, i) pm.plot_posterior(t, varnames=[n], color='#87ceeb', ax=ax) ax.set_title(n) ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f')) ax.locator_params(axis='x', nbins=4) ax.locator_params(axis='y', nbins=1) figs.append((n, fig)) if show: plt.show() return fig_1, figs
def main(n, observed): ''' parameters -------- n : int number of trials observed: int observed number of success ''' with pm.Model() as exam_model: # Week uniform prior for prior prior = pm.Beta('prior', 0.5, 0.5) # Bernouli trials modeled using binomial distribution obs = pm.Binomial('obs', n=n, p=prior, observed=observed) # plot model design pm.model_to_graphviz(exam_model) # Use metropolis hasting for sampling step = pm.Metropolis() # sample from the prior distribution to get the posterior trace = pm.sample(5000, step) # plot posterior pm.plot_posterior(trace) # calculate gelman rubin stats pm.gelman_rubin(trace)
def fit_model(model): start = time.time() with model: step = pm.NUTS() trace = pm.sample(2000, step=step, chains=1, cores=1, tune=1000) print(time.time() - start) pm.plot_posterior(trace) pm.traceplot(trace)
def update_bayesian_modeling(mean_upd, var_upd, alpha_upd, beta_upd, inv_a_upd, inv_b_upd, iv_upd, strategy, stock_price, strike_price, risk_free, time): with pm.Model() as update_model: prior = pm.InverseGamma('bv', inv_a_upd, inv_b_upd) likelihood = pm.InverseGamma('like', inv_a_upd, inv_b_upd, observed=iv_upd) with update_model: # step = pm.Metropolis() v_trace_update = pm.sample(10000, tune=1000) #print(v_trace['bv'][:]) trace_update = v_trace_update['bv'][:] #print(trace) pm.traceplot(v_trace_update) plt.show() pm.autocorrplot(v_trace_update) plt.show() pm.plot_posterior(v_trace_update[100:], color='#87ceeb', point_estimate='mean') plt.show() s = pm.summary(v_trace_update).round(2) print("\n Summary") print(s) a = np.random.choice(trace_update, 10000, replace=True) ar = [] for i in range(9999): t = a[i] / 100 ar.append(t) #print("Bayesian Volatility Values", ar) op = [] for i in range(9999): temp = BS_price(strategy, stock_price, strike_price, risk_free, ar[i], time) op.append(temp) #print("Bayesian Option Prices", op) plt.hist(ar, bins=50) plt.title("Volatility") plt.ylabel("Frequency") plt.show() plt.hist(op, bins=50) plt.title("Option Price") plt.ylabel("Frequency") plt.show() return trace_update
def plot_ADVI_posterior(NNInput, ADVIApprox): if (NNInput.PlotShow): fig = plt.figure() pymc3.plot_posterior(ADVIApprox.sample(NNInput.NApproxSamplesADVI), color='LightSeaGreen') plt.show() FigPath = NNInput.PathToOutputFldr + '/ADVI_Posterior.png' fig.savefig(FigPath)
def plot_posterior(trace, *args, **kwargs): """Plot posterior of random variables. Default behaviour is to plot all random variables. Variables can be selectively plotted by mentioning them in `var_names` argument. """ # convert vector-valued variables into multiple scalars trace, vectors = disentangle_trace(trace) # resolve variable names var_names = resolve_var_names(kwargs.get('var_names'), trace, vectors) if var_names is not None: kwargs['var_names'] = var_names pm.plot_posterior(trace, *args, **kwargs) # `pymc3.traceplot`
def model_pymc(std_prior_lower=100.0, std_prior_upper=1000.0): with pm.Model() as model: group_sa_mean = pm.Normal('SA_mean', prior_1.mean(), sd=prior_1.std()) group_me_mean = pm.Normal('ME_mean', prior_2.mean(), sd=prior_2.std()) # std_prior_lower = 100.0 # std_prior_upper = 1000.0 # changed to more accurately reflect the standard deviations here with model: group_sa_std = pm.Uniform('SA_std', lower=std_prior_lower, upper=std_prior_upper) group_me_std = pm.Uniform('ME_std', lower=std_prior_lower, upper=std_prior_upper) group_sa = pm.Normal('SA_acts', mu=group_sa_mean, sd=group_sa_std, observed=sa_80s) group_me = pm.Normal('ME_acts', mu=group_me_mean, sd=group_me_std, observed=me_00s) diff_of_means = pm.Deterministic('difference of means', group_sa_mean - group_me_mean) diff_of_stds = pm.Deterministic('difference of stds', group_sa_std - group_me_std) effect_size = pm.Deterministic( 'effect size', diff_of_means / np.sqrt( (group_sa_std**2 + group_me_std**2) / 2)) with model: trace = pm.sample(25000, njobs=4) pm.plot_posterior(trace[3000:], varnames=['SA_mean', 'ME_mean', 'SA_std', 'ME_std'], color='#F4953B') pm.plot_posterior( trace[3000:], varnames=['difference of means', 'difference of stds', 'effect size'], ref_val=0, color='#87ceeb') plt.show()
def summary(trace, varnames=None, plot_convergence_stats=False): pm.plot_posterior(trace, varnames=varnames) plt.show() pm.plots.traceplot(trace, varnames=varnames) plt.show() pm.plots.forestplot(trace, varnames=varnames) plt.show() if plot_convergence_stats: pm.plots.energyplot(trace) plt.show() print( 'Gelman-Rubin ', max( np.max(gr_values) for gr_values in pm.gelman_rubin(trace).values())) return pm.summary(trace, varnames=varnames)
def dice_bias(): y = np.asarray([20, 21, 17, 19, 17, 30]) k = len(y) p = 1/k n = y.sum() with pm.Model() as dice_model: # initializes the Dirichlet distribution with a uniform prior: a = np.ones(k) theta = pm.Dirichlet("theta", a=a) # Since theta[5] will hold the posterior probability # of rolling a 6 we'll compare this to the # reference value p = 1/6 to determine the amount of bias # in the die six_bias = pm.Deterministic("six_bias", theta[k-1] - p) results = pm.Multinomial("results", n=n, p=theta, observed=y) dice_trace = pm.sample(draws=1000) pm.traceplot(dice_trace, combined=True, lines={"theta": p}) axes = pm.plot_posterior(dice_trace, varnames=["theta"], ref_val=np.round(p, 3)) for i, ax in enumerate(axes): ax.set_title(f"{i+1}") six_bias = dice_trace["six_bias"] six_bias_perc = len(six_bias[six_bias>0])/len(six_bias) plt.show() print(f'P(Six is biased) = {six_bias_perc:.2%}')
def excel_posterior(trace, filename): #Need to read the data again to set activity number and names prj = project_reader(filename) WP_NAMES = np.array(prj[1][:, 0]) WP_NUMBER = prj[1][:, 0].shape[0] PV_names = list() PVpartial_names = list() EV_names = list() COMP_names = list() SPI_names = list() CPI_names = list() Index_names = ["SPI_PROJECT", "CPI_PROJECT", "ETC", "EAC", "TEAC"] RISK_names = list() projectDefinition = prj[1] for x in range(WP_NUMBER): for y in range(2): if (projectDefinition[x][y + 1] != 0): rname = projectDefinition[x][0] + "_Risk_%d" % (y + 1) RISK_names.append(rname) for x in range(WP_NUMBER): PV_names.append("PV_%s" % WP_NAMES[x]) PVpartial_names.append("Partial_PV_%s" % WP_NAMES[x]) EV_names.append("EV_%s" % WP_NAMES[x]) COMP_names.append("COMPLETION_%s" % WP_NAMES[x]) SPI_names.append("SPI_%s" % WP_NAMES[x]) CPI_names.append("CPI_%s" % WP_NAMES[x]) all_names = RISK_names + PV_names + PVpartial_names + EV_names + COMP_names + SPI_names + CPI_names + Index_names outputName = filename + "Output.xlsx" traceName = filename + "Trace.xlsx" pm.summary(trace, varnames=all_names, stat_funcs=[trace_mean, trace_sd, trace_quantiles]).to_excel(outputName, sheet_name="Summary") pm.plot_posterior(trace, varnames=all_names) pm.trace_to_dataframe(trace).to_excel(traceName, sheet_name="Trace")
def plot_difference_of_means(trace, **kwargs): """ Plots a difference of means graph @param trace a trace object @param **kwargs keyword arguments for matplotlib @returns a plot axes with the graph plotted """ ps1 = pm.plot_posterior(trace, varnames=['difference of means'], ref_val=0, color='#87ceeb', **kwargs) return ps1
def _plot_posterior(self, betaj, save_path=None, axes_size=4, shape=None, credible_interval=0.94, color_bad=True, beta0=None): fig, axes = self._predictor_canvas(self.predictors, axes_size, 1, shape=shape) if beta0 is None: beta0 = betaj[0] betaj = betaj[1:] pm.plot_posterior(beta0.values, point_estimate='mode', ax=axes[0, 0], color=color) axes[0, 0].set_xlabel(r'$\beta_0$ (Intercept)', fontdict=f_dict) axes[0, 0].set_title('', fontdict=f_dict) columns = self.predictors.columns for i, (ax, feature) in enumerate(zip(axes.flatten()[1:], columns)): pm.plot_posterior(betaj[feature].values, point_estimate='mode', credible_interval=credible_interval, ax=ax, color=color) ax.set_title('', fontdict=f_dict) ax.set_xlabel(r'$\beta_{{{}}}$ ({})'.format(i + 1, ' '.join(feature)), fontdict=f_dict) if color_bad and not self.is_credible(self.trace['zbetaj'][:, i], credible_interval): ax.patch.set_facecolor('#FFCCCB') ax.patch.set_visible(True) if save_path is not None: fig.savefig(save_path, transparent=True) return fig
def applyBayesianSampling(self, final_col_list): formula = 'G3 ~ ' + ' + '.join([i for i in final_col_list]) print('formula : ', formula) # Context for the model with pm.Model() as normal_model: # Setting the likelihood as a normal distribution family = pm.glm.families.Normal() # Creating the model using the family, data and formula pm.GLM.from_formula(formula, data=self.X_train, family=family) # Perform Markov Chain Monte Carlo sampling normal_trace = pm.sample(draws=3000, chains=2, tune=1200, cores=-1) print(pm.summary(normal_trace)) pm.traceplot(normal_trace) plt.show() pm.plot_posterior(normal_trace) plt.show() blr_formula = self.bayesianLRFormula(normal_trace) print('blr_formula :', blr_formula) return normal_trace, blr_formula
def cmt_example(): obs = { 'n1_Hp_Rp': 519, 'n1_Hp': 10473, 'P_Hp': 0.1561185895315763, 'n_Hp_Rp': 42, 'n_Hn_Rp': 2, 'n_Hp_Rn': 687, 'n_Hn_Rn': 3624 } trace = sample_heuristic_precision(obs, {'draws': 10000, 'tune': 5000}) pm.plot_posterior(trace, credible_interval=0.94) pm.plot_posterior(trace, credible_interval=0.99) help(pm.plot_posterior) pm.traceplot(trace) pm.forestplot(trace) q_samples = trace['q'] np.average(q_samples < 0.03)
def plot_posterior(self, varnames=None, ref_val=None): """Generate informative plots form the trace. Parameters ---------- varnames : iterable of str or None, optional The model variables to generate plots for (default None). If None, defaults to all variables. ref_val: int or float or None, optional The value to use as reference on the plots (default None). Generally only relevant for posteriors on differences of means and standard deviations. For example, if ref_val = 0, a bar will be placed on the posterior plot at a point corresponding to zero difference in parameters. If this bar lies within the 95% HPD, then it is likely that there is no significant difference between the parameters. """ varnames = varnames or self.model_variables pm.plot_posterior(self.trace, varnames=varnames, ref_val=ref_val, color='#8BCAF1')
def plot_posterior(self, varnames=["estimate"], ref_val=np.log2(3), color="LightSeaGreen", rope=[-.4, .4], xlim=[-.5, 2], suffix=""): plt.rcParams.update({'font.size': 15, 'figure.figsize': (7, 5)}) pm.plot_posterior(self.trace, varnames=varnames, ref_val=ref_val, color=color, rope=rope) plt.title("log2FC Posterior for {}".format(self.p.split(";")[0])) plt.xlim(xlim) plt.ylabel("Probability") plt.savefig(os.path.join(self.plot_dir, "posteriors", "eps", "{}{}.eps".format(self.p, suffix)), format='eps', dpi=900) plt.savefig( os.path.join(self.plot_dir, "posteriors", "png", "{}{}.png".format(self.p, suffix))) plt.close()
def getAngelRate(data, n_sample=10000, n_chain=3, ax=None): # データの整理 data_0 = data.query('campaign != 1') data_1 = data.query('campaign == 1') d = np.array([[ sum(data_0['angel'] == 0), sum(data_0['angel'] == 1), sum(data_0['angel'] == 2) ], [ sum(data_1['angel'] == 0), sum(data_1['angel'] == 1), sum(data_1['angel'] == 2) ]]) weight = np.array([[1.0, 1.0, 1.0], [1.0, 0.0, 2.0]]) # パラメータ推定 with pm.Model() as model: alpha = [1., 1., 1.] # hyper-parameter of DirichletDist. pi = pm.Dirichlet('pi', a=np.array(alpha)) for i in np.arange(d.shape[0]): piw = pi * weight[i] m = pm.Multinomial('m_%s' % (i), n=np.sum(d[i]), p=piw, observed=d[i]) trace = pm.sample(n_sample, chains=n_chain) np.savetxt('trace_pi.csv', trace['pi'], delimiter=',') # Silver hpd_l, hpd_u = pm.hpd(trace['pi'][:, 1]) print('Silver : 95% HPD : {}-{}'.format(hpd_l, hpd_u)) print('Silver ExpectedValue : {}'.format(trace['pi'][:, 1].mean())) # Gold hpd_l, hpd_u = pm.hpd(trace['pi'][:, 2]) print('Gold : 95% HPD : {}-{}'.format(hpd_l, hpd_u)) print('Gold ExpectedValue : {}'.format(trace['pi'][:, 2].mean())) # save fig if ax is not None: pm.plot_posterior(trace['pi'][:, 0], ax=ax[0]) pm.plot_posterior(trace['pi'][:, 1], ax=ax[1]) pm.plot_posterior(trace['pi'][:, 2], ax=ax[2]) ax[0].set_title('Nothing') ax[1].set_title('SilverAngel') ax[2].set_title('GoldAngel') return trace
def run(n=1500): if n == 'short': n = 50 print('Model with no censored data (omniscient)') with omniscient_model: trace = pm.sample(n) pm.plot_posterior(trace[-1000:], varnames=['mu', 'sigma']) plt.show() print('Imputed censored model') with imputed_censored_model: trace = pm.sample(n) pm.plot_posterior(trace[-1000:], varnames=['mu', 'sigma']) plt.show() print('Unimputed censored model') with unimputed_censored_model: trace = pm.sample(n) pm.plot_posterior(trace[-1000:], varnames=['mu', 'sigma']) plt.show()
## Plot KDE and sampled values for each parameter. #pm.traceplot(trace) model_idx_sample = trace['model_index'] pM1 = sum(model_idx_sample == 0) / len(model_idx_sample) pM2 = 1 - pM1 plt.figure(figsize=(15, 15)) plt.subplot2grid((3,3), (0,0), colspan=3) plt.plot(model_idx_sample, label='p(DiffMu|D) = %.3f ; p(SameMu|D) = {:.3f}'.format(pM1, pM2)); plt.xlabel('Step in Markov Chain') plt.legend(loc='upper right', framealpha=0.75) count = 0 position = [(1,0), (1,1), (1,2), (2,0), (2,1), (2,2)] for i in range(0, 4): mui_sample = trace['mu1'][:,i][model_idx_sample == 0] for j in range(i+1, 4): muj_sample = trace['mu1'][:,j][model_idx_sample == 0] ax = plt.subplot2grid((3,3), position[count]) pm.plot_posterior(mui_sample-muj_sample, ref_val=0, ax=ax) plt.title(r'$\mu_{} - \mu_{}$'.format(i+1, j+1)) plt.xlim(-0.3, 0.3) count += 1 plt.tight_layout() plt.savefig('Figure_12.5.png') plt.show()
## Print summary for each trace #pm.summary(trace) ## Check for mixing and autocorrelation #pm.autocorrplot(trace, vars =[mu, tau]) ## Plot KDE and sampled values for each parameter. #pm.traceplot(trace) ## Extract chains muG_sample = trace['muG'] tauG_sample = trace['tauG'] m_sample = trace['m'] d_sample = trace['d'] # Plot the hyperdistributions: _, ax = plt.subplots(1, 4, figsize=(20, 5)) pm.plot_posterior(muG_sample, bins=30, ax=ax[0]) ax[0].set_xlabel(r'$\mu_g$', fontsize=16) pm.plot_posterior(tauG_sample, bins=30, ax=ax[1]) ax[1].set_xlabel(r'$\tau_g$', fontsize=16) pm.plot_posterior(m_sample, bins=30, ax=ax[2]) ax[2].set_xlabel('m', fontsize=16) pm.plot_posterior(d_sample, bins=30, ax=ax[3]) ax[3].set_xlabel('d', fontsize=16) plt.tight_layout() plt.savefig('Figure_15.9.png') plt.show()
with model1: theta = pm.Dirichlet("theta", a=alpha, shape=(D, K)) phi = pm.Dirichlet("phi", a=beta, shape=(K, V)) doc = pm.DensityDist('doc', log_lda(theta, phi), observed=data) with model1: inference = pm.ADVI() approx = pm.fit( n=10000, method=inference, callbacks=[pm.callbacks.CheckParametersConvergence(diff='absolute')]) #inference tr1 = approx.sample(draws=1000) pm.plots.traceplot(tr1) pm.plot_posterior(tr1, color='LightSeaGreen') plt.plot(approx.hist) ''' With MCMC ''' with model: theta = pm.Dirichlet("thetas", a=alpha, shape=(D, K)) phi = pm.Dirichlet("phis", a=beta, shape=(K, V)) z = pm.Categorical("zx", p=theta, shape=(W, D)) w = pm.Categorical("wx", p=t.reshape(phi[z.T], (D * W, V)), observed=data.reshape(D * W)) with model: tr = pm.sample(1000, chains=1)
def bayesian_t(df, val_col, grp_col='regulated', sig_fac=2, unif_l=0, unif_u=20, exp_mn=30, plot_trace=False, plot_ppc=False, plot_vars=False, plot_diffs=True, steps=2000, mcmc='metropolis'): """ Simple Bayesian test for differences between two groups. Args: df Dataframe. Must have a column containing values and a categorical 'regulated' column that is [0, 1] to define the two groups val_col Name of the values column grp_col Name of the categorical column defining the groups sig_fac Factor applied to std. dev. of pooled data to define prior std. dev. for group means unif_l Lower bound for uniform prior on std. dev. of group means unif_u Upper bound for uniform prior on std. dev. of group means exp_mn Mean of exponential prior for v in Student-T distribution plot_trace Whether to plot the MCMC traces plot_ppc Whether to perform and plot the Posterior Predictive Check plot_vars Whether to plot posteriors for variables plot_diffs Whether to plot posteriors for differences steps Number of steps to take in MCMC chains mcmc Sampler to use: ['metropolis', 'slice', 'nuts'] Returns: Creates plots showing the distribution of differences in means and variances, plus optional diagnostics. Returns the MCMC trace """ import numpy as np import pymc3 as pm import pandas as pd import seaborn as sn import matplotlib.pyplot as plt # Get overall means and s.d. mean_all = df[val_col].mean() std_all = df[val_col].std() # Group data grpd = df.groupby(grp_col) # Separate groups reg_data = grpd.get_group(1)[val_col].values ureg_data = grpd.get_group(0)[val_col].values # Setup model with pm.Model() as model: # Priors for means of Student-T dists reg_mean = pm.Normal('regulated_mean', mu=mean_all, sd=std_all*sig_fac) ureg_mean = pm.Normal('unregulated_mean', mu=mean_all, sd=std_all*sig_fac) # Priors for std. dev. of Student-T dists reg_std = pm.Uniform('regulated_std', lower=unif_l, upper=unif_u) ureg_std = pm.Uniform('unregulated_std', lower=unif_l, upper=unif_u) # Prior for v of Student-T dists nu = pm.Exponential('v_minus_one', 1./29.) + 1 # Define Student-T dists # PyMC3 uses precision = 1 / (sd^2) to define dists rather than std. dev. reg_lam = reg_std**-2 ureg_lam = ureg_std**-2 reg = pm.StudentT('regulated', nu=nu, mu=reg_mean, lam=reg_lam, observed=reg_data) ureg = pm.StudentT('unregulated', nu=nu, mu=ureg_mean, lam=ureg_lam, observed=ureg_data) # Quantities of interest (difference of means and std. devs.) diff_of_means = pm.Deterministic('difference_of_means', reg_mean - ureg_mean) diff_of_stds = pm.Deterministic('difference_of_stds', reg_std - ureg_std) # Run sampler to approximate posterior if mcmc == 'metropolis': trace = pm.sample(steps, step=pm.Metropolis()) elif mcmc == 'slice': trace = pm.sample(steps, step=pm.Slice()) elif mcmc == 'nuts': trace = pm.sample(steps) else: raise ValueError("mcmc must be one of ['metropolis', 'slice', 'nuts']") # Plot results # Raw data fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14,4)) for name, grp in grpd: sn.distplot(grp[val_col].values, ax=axes[name], kde=False) axes[name].set_title('Regulated = %s' % name) # Traces if plot_trace: pm.traceplot(trace) # Posteriors for variables if plot_vars: pm.plot_posterior(trace[1000:], varnames=['regulated_mean', 'unregulated_mean', 'regulated_std', 'unregulated_std'], alpha=0.3) # Posteriors for differences if plot_diffs: pm.plot_posterior(trace[1000:], varnames=['difference_of_means', 'difference_of_stds'], ref_val=0, alpha=0.3) # Posterior predictive check if plot_ppc: ppc = pm.sample_ppc(trace, samples=500, model=model, size=100) fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14,4)) sn.distplot([n.mean() for n in ppc['unregulated']], ax=axes[0]) axes[0].axvline(ureg_data.mean(), c='k') axes[0].set(title='Posterior predictive of the mean (unregulated)', xlabel='Mean', ylabel='Frequency') sn.distplot([n.mean() for n in ppc['regulated']], ax=axes[1]) axes[1].axvline(reg_data.mean(), c='k') axes[1].set(title='Posterior predictive of the mean (regulated)', xlabel='Mean', ylabel='Frequency') return trace
# Plot the trajectory of the last 500 sampled values. plt.plot(theta1_sample[:-500], theta2_sample[:-500], marker='o', color='skyblue') plt.xlim(0, 1) plt.ylim(0, 1) plt.xlabel(r'$\theta1$') plt.ylabel(r'$\theta2$') # Display means in plot. plt.plot(0, label='M = %.3f, %.3f' % (np.mean(theta1_sample), np.mean(theta2_sample)), alpha=0.0) plt.legend(loc='upper left') plt.savefig('Figure_8.6.png') # Plot a histogram of the posterior differences of theta values. theta_diff = theta1_sample - theta2_sample pm.plot_posterior(theta_diff, ref_val=0.0, bins=30, color='skyblue') plt.xlabel(r'$\theta_1 - \theta_2$') plt.savefig('Figure_8.8.png') # For Exercise 8.5: # Posterior prediction. For each step in the chain, use the posterior thetas # to flip the coins. chain_len = len(theta1_sample) # Create matrix to hold results of simulated flips: y_pred = np.zeros((2, chain_len)) for step_idx in range(chain_len): # step through the chain # flip the first coin: p_head1 = theta1_sample[step_idx] y_pred[0, step_idx] = np.random.choice([0,1], p=[1-p_head1, p_head1]) # flip the second coin: p_head2 = theta2_sample[step_idx]
plt.subplot(1, 2, 1) thin_idx = 50 plt.plot(z1[::thin_idx], z0[::thin_idx], 'b.', alpha=0.7) plt.ylabel('Standardized Intercept') plt.xlabel('Standardized Slope') plt.subplot(1, 2, 2) plt.plot(b1[::thin_idx], b0[::thin_idx], 'b.', alpha=0.7) plt.ylabel('Intercept (ht when wt=0)') plt.xlabel('Slope (pounds per inch)') plt.tight_layout() plt.savefig('Figure_16.4.png') # Display the posterior of the b1: plt.figure(figsize=(8, 5)) ax = plt.subplot(1, 2, 1) pm.plot_posterior(z1, ref_val=0.0, bins=30, ax=ax) ax.set_xlabel('Standardized slope') ax = plt.subplot(1, 2, 2) pm.plot_posterior(b1, ref_val=0.0, bins=30, ax=ax) ax.set_xlabel('Slope (pounds per inch)') plt.tight_layout() plt.savefig('Figure_16.5.png') # Display data with believable regression lines and posterior predictions. plt.figure() # Plot data values: x_rang = np.max(x) - np.min(x) y_rang = np.max(y) - np.min(y) lim_mult = 0.25 x_lim = [np.min(x)-lim_mult*x_rang, np.max(x)+lim_mult*x_rang] y_lim = [np.min(y)-lim_mult*y_rang, np.max(y)+lim_mult*y_rang]
scale = np.repeat([sigma[chain_idx]], [len(x_post_pred)])) for x_idx in range(len(x_post_pred)): y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx]) # Display believable beta0 and b1 values plt.figure() thin_idx = 5 plt.plot(b1[::thin_idx], b0[::thin_idx], '.') plt.ylabel("Intercept") plt.xlabel("Slope") plt.savefig('Figure_16.x0.png') # Display the posterior of the b1: ax = pm.plot_posterior(b1, ref_val=0.0, bins=30) ax.set_xlabel(r'Slope ($\Delta$ tar / $\Delta$ weight)') plt.title('Mean tdf = %.2f' % tdf_m) plt.savefig('Figure_16.8b.png') # Display data with believable regression lines and posterior predictions. plt.figure() plt.plot(x, y, 'k.') plt.title('Data with credible regression lines') plt.xlabel('weight') plt.ylabel('tar') plt.xlim(x_lim) plt.ylim(y_lim) # Superimpose a smattering of believable regression lines: for i in range(0, len(b0), 5): plt.plot(x, b0[i] + b1[i]*x , c='k', alpha=0.05 )
#pm.summary(trace) ## Check for mixing and autocorrelation #pm.autocorrplot(trace, vars =[mu, tau]) ## Plot KDE and sampled values for each parameter. #pm.traceplot(trace) mu_sample = trace['mu'] sigma_sample = trace['sd'] plt.figure(figsize=(10, 6)) ax = plt.subplot(1, 2, 1) pm.plot_posterior(mu_sample, bins=30, ax=ax) ax.set_xlabel('mu') ax.set_title = 'Posterior' ax.set_xlim(98, 102) plt.subplot(1, 2, 2) mu_mean = np.mean(mu_sample) sigma_mean = np.mean(sigma_sample) plt.scatter(mu_sample, sigma_sample , c='gray') plt.plot(mu_mean, sigma_mean, 'C1*', label=r'$\mu$ = %.1f, $\sigma$ = %.1f' % (mu_mean, sigma_mean)) plt.xlabel('mu') plt.ylabel('sigma') plt.title('Posterior')
n_accepted += 1 else: # reject the proposed jump, stay at current position trajectory[t+1] = current_position # increment the rejected counter, just to monitor performance if t > burn_in: n_rejected += 1 # Extract the post-burn_in portion of the trajectory. accepted_traj = trajectory[burn_in:] # End of Metropolis algorithm. # Display the posterior. ROPE = np.array([0.76, 0.8]) pm.plot_posterior(accepted_traj, ref_val=0.9, rope=ROPE) plt.xlabel = 'theta' # Display rejected/accepted ratio in the plot. mean_traj = np.mean(accepted_traj) std_traj = np.std(accepted_traj) plt.plot(0, label=r'$N_{pro}=%s$ $\frac{N_{acc}}{N_{pro}} = %.3f$' % (len(accepted_traj), (n_accepted/len(accepted_traj))), alpha=0) # Evidence for model, p(D). # Compute a,b parameters for beta distribution that has the same mean # and stdev as the sample from the posterior. This is a useful choice # when the likelihood function is Bernoulli. a = mean_traj * ((mean_traj*(1 - mean_traj)/std_traj**2) - 1) b = (1 - mean_traj) * ((mean_traj*(1 - mean_traj)/std_traj**2) - 1)
if n_predictors >= 6: # don't display if too many predictors n_predictors == 6 columns = ['Sigma y', 'Intercept'] [columns.append('Slope_%s' % i) for i in predictor_names[:n_predictors]] traces = np.array([sigma_samp, b0_samp, b_samp[:,0], b_samp[:,1]]).T df = pd.DataFrame(traces, columns=columns) g = sns.PairGrid(df) g.map(plt.scatter) plt.savefig('Figure_17.Xa.png') ## Display the posterior: plt.figure(figsize=(16,4)) ax = plt.subplot(1, n_predictors+2, 1) pm.plot_posterior(sigma_samp, ax=ax) ax.set_xlabel(r'$\sigma y$') ax = plt.subplot(1, n_predictors+2, 2) pm.plot_posterior(b0_samp, ax=ax) ax.set_xlabel('Intercept') for i in range(0, n_predictors): ax = plt.subplot(1, n_predictors+2, 3+i) pm.plot_posterior(b_samp[:,i], ref_val=0, ax=ax) ax.set_xlabel('Slope_%s' % predictor_names[i]) plt.tight_layout() plt.savefig('Figure_17.Xb.png') # Posterior prediction: # Define matrix for recording posterior predicted y values for each xPostPred. # One row per xPostPred value, with each row holding random predicted y values.
## Plot KDE and sampled values for each parameter. #pm.traceplot(trace) pm.traceplot(trace) # Create arrays with the posterior sample mu1_sample = trace['mu'][:,0] mu2_sample = trace['mu'][:,1] mu3_sample = trace['mu'][:,2] mu4_sample = trace['mu'][:,3] # Plot differences among filtrations experiments fig, ax = plt.subplots(1, 3, figsize=(15, 6)) pm.plot_posterior(mu1_sample-mu2_sample, ax=ax[0], color='skyblue') ax[0].set_xlabel(r'$\mu1-\mu2$') # Plot differences among condensation experiments pm.plot_posterior(mu3_sample-mu4_sample, ax=ax[1], color='skyblue') ax[1].set_xlabel(r'$\mu3-\mu4$') # Plot differences between filtration and condensation experiments a = (mu1_sample+mu2_sample)/2 - (mu3_sample+mu4_sample)/2 pm.plot_posterior(a, ax=ax[2], color='skyblue') ax[2].set_xlabel(r'$(\mu1+\mu2)/2 - (\mu3+\mu4)/2$') plt.tight_layout() plt.savefig('Figure_9.16.png') plt.show()
def posterior_plot(self, **kwargs): return pm.plot_posterior(self.posterior_, **kwargs)
# define priors prior_v1 = pm.Beta('prior_v1', alpha=2, beta=2) prior_v2 = pm.Beta('prior_v2', alpha=2, beta=2) # define likelihood like_v1 = pm.Binomial('like_v1', n=n, p=prior_v1, observed=obs_v1) like_v2 = pm.Binomial('like_v2', n=n, p=prior_v2, observed=obs_v2) # define metrics pm.Deterministic('difference', prior_v2 - prior_v1) pm.Deterministic('relation', (prior_v2/prior_v1) - 1) # inference trace = pm.sample(draws=50000, step=pm.Metropolis(), start=pm.find_MAP(), progressbar=True) _ = pm.plot_posterior(trace[1000:], varnames=['difference', 'relation'], ref_val=0, color='#87ceeb') # NOTE # n - Number of training examples. # i - ith training example in a data set. # y(i) - Ground truth label for ith training example. # y_hat(i) - Prediction for ith training example. ## Compute MSE import numpy as np y_hat = np.array([0.000, 0.166, 0.333]) y_true = np.array([0.000, 0.254, 0.998]) def rmse(predictions, targets): differences = predictions - targets differences_squared = differences ** 2 mean_of_differences_squared = differences_squared.mean()
y = pm.Bernoulli('y', p=theta, observed=y) # Generate a MCMC chain trace = pm.sample(1000) # create an array with the posterior sample theta_sample = trace['theta'] fig, ax = plt.subplots(1, 2) ax[0].plot(theta_sample[:500], np.arange(500), marker='o', color='skyblue') ax[0].set_xlim(0, 1) ax[0].set_xlabel(r'$\theta$') ax[0].set_ylabel('Position in Chain') pm.plot_posterior(theta_sample, ax=ax[1], color='skyblue'); ax[1].set_xlabel(r'$\theta$'); # Posterior prediction: # For each step in the chain, use posterior theta to flip a coin: y_pred = np.zeros(len(theta_sample)) for i, p_head in enumerate(theta_sample): y_pred[i] = np.random.choice([0, 1], p=[1 - p_head, p_head]) # Jitter the 0,1 y values for plotting purposes: y_pred_jittered = y_pred + np.random.uniform(-.05, .05, size=len(theta_sample)) # Now plot the jittered values: plt.figure() plt.plot(theta_sample[:500], y_pred_jittered[:500], 'C1o') plt.xlim(-.1, 1.1)
# likehood y = pm.Normal('y', mu=means[idx], sd=sds[idx], observed=y) trace = pm.sample(5000, njobs=1) chain = trace[100::] fig = plt.figure() pm.traceplot(chain) pdf_all.savefig() # mean, standard deviation, and the HPD intervals print(pm.summary(trace)) # dist = stats.norm() fig, ax = plt.subplots(3, 2, figsize=(16,12)) comparisons = [(i,j) for i in range(len(set(idx))) for j in range(i+1, len(set(idx)))] pos = [(k,l) for k in range(3) for l in (0,1)] for (i,j), (k,l) in zip(comparisons, pos): means_diff = chain['means'][:,i] - chain['means'][:,j] d_cohen = (means_diff / np.sqrt((chain['sds'][:,i]**2 + chain['sds'][:,j]**2)/2) ).mean() ps = dist.cdf(d_cohen/(2**0.5)) pm.plot_posterior(means_diff, ref_val=0, ax=ax[k,l], color='skyblue') ax[k,l].plot(0, label="Cohen's d={:.2f}\nPrbo sup={:.2f}".format(d_cohen, ps), alpha=0) ax[k,l].set_xlabel('$\mu_{}-\mu_{}$'.format(i, j), fontsize=15) ax[k,l ].legend(loc=0, fontsize=14) pdf_all.savefig(fig)
# Extract values of 'a' a0_sample = trace['a0'] b1_sample = trace['b1'] b2_sample = trace['b2'] b1b2_sample = trace['b1b2'] b0_sample = a0_sample * np.std(y) + np.mean(y) b1_sample = b1_sample * np.std(y) b2_sample = b2_sample * np.std(y) b1b2_sample = b1b2_sample * np.std(y) plt.figure(figsize=(25,20)) ax = plt.subplot(451) pm.plot_posterior(b0_sample, bins=50, ax=ax) ax.set_xlabel(r'$\beta0$') ax.set_title('Baseline') plt.xlim(b0_sample.min(), b0_sample.max()); count = 2 for i in range(len(b1_sample[0])): ax = plt.subplot(4, 5, count) pm.plot_posterior(b1_sample[:,i], ax=ax) ax.set_xlabel(r'$\beta1_{}$'.format(i)) ax.set_title('x1: {}'.format(x1names[i])) count += 1 for i in range(len(b2_sample[0])): ax = plt.subplot(4, 5, count) pm.plot_posterior(b2_sample[:,i], bins=50, ax=ax)
#pm.autocorrplot(trace, varnames=['mu', 'kappa']) ## Plot KDE and sampled values for each parameter. #pm.traceplot(trace[burnin:]) pm.traceplot(trace) # Create arrays with the posterior sample mu1_sample = trace['mu'][:,0][burnin:] mu2_sample = trace['mu'][:,1][burnin:] mu3_sample = trace['mu'][:,2][burnin:] mu4_sample = trace['mu'][:,3][burnin:] # Plot differences among filtrations experiments fig, ax = plt.subplots(1, 3, figsize=(15, 6)) pm.plot_posterior((mu1_sample-mu2_sample), ax=ax[0], ref_val=0, color='skyblue') ax[0].set_xlabel(r'$\mu1-\mu2$') # Plot differences among condensation experiments pm.plot_posterior((mu3_sample-mu4_sample), ax=ax[1], ref_val=0, color='skyblue') ax[1].set_xlabel(r'$\mu3-\mu4$') # Plot differences between filtration and condensation experiments a = (mu1_sample+mu2_sample)/2 - (mu3_sample+mu4_sample)/2 pm.plot_posterior(a, ax=ax[2], ref_val=0, color='skyblue') ax[2].set_xlabel(r'$(\mu1+\mu2)/2 - (\mu3+\mu4)/2$') plt.tight_layout() plt.savefig('Figure_9.18_upper.png') plt.show()
#pm.autocorrplot(trace, vars =[nu, eta]) ## Plot KDE and sampled values for each parameter. #pm.traceplot(trace) model_idx_sample = trace['model_index'] pM1 = sum(model_idx_sample == 0) / len(model_idx_sample) pM2 = 1 - pM1 nu_sample_M1 = trace['nu'][model_idx_sample == 0] eta_sample_M2 = trace['eta'][model_idx_sample == 1] plt.figure() plt.subplot(2, 1, 1) pm.plot_posterior(nu_sample_M1) plt.xlabel(r'$\nu$') plt.ylabel('frequency') plt.title(r'p($\nu$|D,M2), with p(M2|D)={:.3}f'.format(pM1), fontsize=14) plt.xlim(-8, 8) plt.subplot(2, 1, 2) pm.plot_posterior(eta_sample_M2) plt.xlabel(r'$\eta$') plt.ylabel('frequency') plt.title(r'p($\eta$|D,M2), with p(M2|D)={:.3f}'.format(pM2), fontsize=14) plt.xlim(0, 8) plt.savefig('figure_ex_10.2_a.png') plt.show()
def robust_lin_reg(df, var_map, steps=2000, mcmc='metropolis', plot_trace=True, plot_vars=True): """ Robust Bayesian linear regression. Args: df Dataframe. Must have a column containing values and a categorical 'regulated' column that is [0, 1] to define the two groups val_map Dict specifying x and y vars: {'x':'expl_var', 'y':'resp_var'} steps Number of steps to take in MCMC chains mcmc Sampler to use: ['metropolis', 'slice', 'nuts'] plot_trace Whether to plot the MCMC traces plot_vars Whether to plot posteriors for variables Returns: Creates plots showing the distribution of differences in means and variances, plus optional diagnostics. Returns the MCMC trace """ import pymc3 as pm import pandas as pd import matplotlib.pyplot as plt import numpy as np import theano # Get cols df = df[var_map.values()] # Swap keys and values var_map_rev = dict((v,k) for k,v in var_map.iteritems()) # Convert df columns to x and y df.columns = [var_map_rev[i] for i in df.columns] with pm.Model() as model: # Priors nu = pm.Exponential('v_minus_one', 1./29.) + 1 # The patsy string below automatically assumes mu=0 and estimates # lam = (1/s.d.**2), so don't need to add these. Do need to add # prior for nu though. family = pm.glm.families.StudentT(nu=nu) # Define model pm.glm.glm('y ~ x', df, family=family) # Find MAP as starting point start = pm.find_MAP() # Run sampler to approximate posterior if mcmc == 'metropolis': step = pm.Metropolis() trace = pm.sample(steps, step, start=start) elif mcmc == 'slice': step = pm.Slice() trace = pm.sample(steps, step, start=start) elif mcmc == 'nuts': step = pm.NUTS(scaling=start) trace = pm.sample(steps, step) else: raise ValueError("mcmc must be one of ['metropolis', 'slice', 'nuts']") # Traces if plot_trace: pm.traceplot(trace) # Posteriors for variables if plot_vars: pm.plot_posterior(trace[-1000:], varnames=['v_minus_one', 'lam'], alpha=0.3) pm.plot_posterior(trace[1000:], varnames=['x', 'Intercept'], ref_val=0, alpha=0.3) # PPC fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(111, xlabel=var_map['x'], ylabel=var_map['y']) ax.scatter(df.x, df.y, marker='o', label='Data') pm.glm.plot_posterior_predictive(trace, samples=50, eval=df.x, label='PPC', alpha=0.3) return trace
pm.autocorrplot(trace[burnin:], varnames=['mu', 'kappa']) #pm.autocorrplot(trace, vars =[mu, kappa]) ## Plot KDE and sampled values for each parameter. pm.traceplot(trace[burnin:]) #pm.traceplot(trace) # Create arrays with the posterior sample theta1_sample = trace['theta'][:,0][burnin:] theta28_sample = trace['theta'][:,27][burnin:] mu_sample = trace['mu'][burnin:] kappa_sample = trace['kappa'][burnin:] # Plot mu histogram fig, ax = plt.subplots(2, 2, figsize=(12,12)) pm.plot_posterior(mu_sample, ax=ax[0, 0], color='skyblue') ax[0, 0].set_xlabel(r'$\mu$') # Plot kappa histogram pm.plot_posterior(kappa_sample, ax=ax[0, 1], color='skyblue') ax[0, 1].set_xlabel(r'$\kappa$') # Plot theta 1 pm.plot_posterior(theta1_sample, ax=ax[1, 0], color='skyblue') ax[1, 0].set_xlabel(r'$\theta1$') # Plot theta 28 pm.plot_posterior(theta1_sample, ax=ax[1, 1], color='skyblue') ax[1, 1].set_xlabel(r'$\theta28$')
theta2_sample = trace['theta'][:, 1] theta3_sample = trace['theta'][:, 2] mu_sample = trace['mu'] kappa_sample = trace['kappa'] # Scatter plot hyper-parameters fig, ax = plt.subplots(4, 3, figsize=(12, 12)) ax[0, 0].scatter(mu_sample, kappa_sample, marker='o', color='skyblue') ax[0, 0].set_xlim(0, 1) ax[0, 0].set_xlabel(r'$\mu$') ax[0, 0].set_ylabel(r'$\kappa$') # Plot mu histogram #plot_post(mu_sample, xlab=r'$\mu$', show_mode=False, labelsize=9, framealpha=0.5) pm.plot_posterior(mu_sample, ax=ax[0, 1], color='skyblue') ax[0, 1].set_xlabel(r'$\mu$') ax[0, 1].set_xlim(0, 1) # Plot kappa histogram #plot_post(kappa_sample, xlab=r'$\kappa$', show_mode=False, labelsize=9, framealpha=0.5) pm.plot_posterior(kappa_sample, ax=ax[0, 2], color='skyblue') ax[0, 2].set_xlabel(r'$\kappa$') # Plot theta 1 #plot_post(theta1_sample, xlab=r'$\theta1$', show_mode=False, labelsize=9, framealpha=0.5) pm.plot_posterior(theta1_sample, ax=ax[1, 0], color='skyblue') ax[1, 0].set_xlabel(r'$\theta1$') ax[1, 0].set_xlim(0, 1)
#pm.autocorrplot(trace, vars=model.unobserved_RVs[:-1]) ## Plot KDE and sampled values for each parameter. pm.traceplot(trace) a0_sample = trace['a0'] b_sample = trace['b'] b0_sample = a0_sample * np.std(y) + np.mean(y) b_sample = b_sample * np.std(y) plt.figure(figsize=(20, 4)) for i in range(5): ax = plt.subplot(1, 5, i+1) pm.plot_posterior(b_sample[:,i], bins=50, ax=ax) ax.set_xlabel=r'$\beta1_{}$'.format(i) ax.set_title='x:{}'.format(i) plt.tight_layout() plt.savefig('Figure_18.xa.png') nContrasts = len(contrast_dict) if nContrasts > 0: plt.figure(figsize=(20, 8)) count = 1 for key, value in contrast_dict.items(): contrast = np.dot(b_sample, value) ax = plt.subplot(2, 4, count) pm.plot_posterior(contrast, ref_val=0.0, bins=50, ax=ax) ax.set_title('Contrast {}'.format(key))
group1_mean - group2_mean) diff_of_stds = pm.Deterministic('difference of stds', group1_std - group2_std) effect_size = pm.Deterministic( 'effect size', diff_of_means / np.sqrt( (group1_std**2 + group2_std**2) / 2)) # RUN #trace = pm.sample(2000, cores=2) # Nota Bene: https://github.com/pymc-devs/pymc3/issues/3388 trace = pm.sample(1000, tune=1000, cores=1) pm.kdeplot(np.random.exponential(30, size=10000), shade=0.5) pm.plot_posterior(trace, varnames=[ 'group1_mean', 'group2_mean', 'group1_std', 'group2_std', 'ν_minus_one' ], color='#87ceeb') pm.plot_posterior( trace, varnames=['difference of means', 'difference of stds', 'effect size'], ref_val=0, color='#87ceeb') pm.forestplot(trace, varnames=['group1_mean', 'group2_mean']) pm.forestplot(trace, varnames=['group1_std', 'group2_std', 'ν_minus_one']) pm.summary( trace,
## Print summary for each trace #pm.summary(trace) ## Check for mixing and autocorrelation #pm.autocorrplot(trace, vars =[mu, tau]) ## Plot KDE and sampled values for each parameter. #pm.traceplot(trace) ## Extract chains muG_sample = trace['muG'] tauG_sample = trace['tauG'] m_sample = trace['m'] d_sample = trace['d'] # Plot the hyperdistributions: _, ax = plt.subplots(1, 4, figsize=(20, 5)) pm.plot_posterior(muG_sample, bins=30, ax=ax[0]) ax[0].set_xlabel(r'$\mu_g$', fontsize=16) pm.plot_posterior(tauG_sample, bins=30 ,ax=ax[1]) ax[1].set_xlabel(r'$\tau_g$', fontsize=16) pm.plot_posterior(m_sample, bins=30, ax=ax[2]) ax[2].set_xlabel('m', fontsize=16) pm.plot_posterior(d_sample, bins=30, ax=ax[3]) ax[3].set_xlabel('d', fontsize=16) plt.tight_layout() plt.savefig('Figure_15.9.png') plt.show()