def main(): with pm.Model() as model: # Using a strong prior. Meaning the mean is towards zero than towards 1 prior = pm.Beta('prior', 0.5, 3) output = pm.Binomial('output', n=100, observed=50, p=prior) step = pm.Metropolis() trace = pm.sample(1000, step=step) pm.traceplot(trace) pm.plot_posterior(trace, figsize=(5, 5), kde_plot=True, rope=[0.45, 0.55]) # Rope is an interval that you define # This is a value you eppect. You can check # If ROPE fall on HPD or not. If it falls, it means # our value is within HPD and may be increasing sample # size would make our mean estimate better. # gelman rubin pm.gelman_rubin(trace) # forestplot pm.forestplot(trace, varnames=['prior']) # summary [look at mc error here. This is the std error, should be low] pm.df_summary(trace) #autocorrelation pm.autocorrplot(trace) # effective size pm.effective_n(trace)['prior']
def test_value_n_eff_rhat(self): mu = -2.1 tau = 1.3 with Model(): Normal('x0', mu, tau, testval=floatX_array(.1)) # 0d Normal('x1', mu, tau, shape=2, testval=floatX_array([.1, .1])) # 1d Normal('x2', mu, tau, shape=(2, 2), testval=floatX_array(np.tile(.1, (2, 2)))) # 2d Normal('x3', mu, tau, shape=(2, 2, 3), testval=floatX_array(np.tile(.1, (2, 2, 3)))) # 3d trace = pm.sample(100, step=pm.Metropolis()) for varname in trace.varnames: # test effective_n value n_eff = pm.effective_n(trace, varnames=[varname])[varname] n_eff_df = np.asarray( pm.summary(trace, varnames=[varname])['n_eff']).reshape(n_eff.shape) npt.assert_equal(n_eff, n_eff_df) # test Rhat value rhat = pm.gelman_rubin(trace, varnames=[varname])[varname] rhat_df = np.asarray( pm.summary(trace, varnames=[varname])['Rhat']).reshape(rhat.shape) npt.assert_equal(rhat, rhat_df)
def test_value_n_eff_rhat(self): mu = -2.1 tau = 1.3 with Model(): Normal('x0', mu, tau, testval=floatX_array(.1)) # 0d Normal('x1', mu, tau, shape=2, testval=floatX_array([.1, .1]))# 1d Normal('x2', mu, tau, shape=(2, 2), testval=floatX_array(np.tile(.1, (2, 2))))# 2d Normal('x3', mu, tau, shape=(2, 2, 3), testval=floatX_array(np.tile(.1, (2, 2, 3))))# 3d trace = pm.sample(100, step=pm.Metropolis()) for varname in trace.varnames: # test effective_n value n_eff = pm.effective_n(trace, varnames=[varname])[varname] n_eff_df = np.asarray( pm.summary(trace, varnames=[varname])['n_eff'] ).reshape(n_eff.shape) npt.assert_equal(n_eff, n_eff_df) # test Rhat value rhat = pm.gelman_rubin(trace, varnames=[varname])[varname] rhat_df = np.asarray( pm.summary(trace, varnames=[varname])['Rhat'] ).reshape(rhat.shape) npt.assert_equal(rhat, rhat_df)
def __init__(self, trace): self._trace = trace self.nchains = trace.nchains if trace.nchains > 1: self.effective_n = pm.effective_n(trace) self.gelman_rubin = pm.gelman_rubin(trace)
def main(): data = np.array([ 51.06, 55.12, 53.73, 50.24, 52.05, 56.40, 48.45, 52.34, 55.65, 51.49, 51.86, 63.43, 53.00, 56.09, 51.93, 52.31, 52.33, 57.48, 57.44, 55.14, 53.93, 54.62, 56.09, 68.58, 51.36, 55.47, 50.73, 51.94, 54.95, 50.39, 52.91, 51.5, 52.68, 47.72, 49.73, 51.82, 54.99, 52.84, 53.19, 54.52, 51.46, 53.73, 51.61, 49.81, 52.42, 54.3, 53.84, 53.16 ]) # look at the distribution of the data sns.kdeplot(data) # All these distributions are used to model std # It is safe to use exponential # half cauchy has a fat tail # Exponential parameter lambda high indicates a high steep # Ineverse gamma with pm.Model() as model: mu = pm.Uniform('mu', 30, 80) sigma = pm.HalfNormal('sigma', sd=10) df = pm.Exponential( 'df', 1.5) # lamda = 1.5, it will be more steep, 0.5 less output = pm.StudentT('output', mu=mu, sigma=sigma, nu=df, observed=data) trace = pm.sample(1000) # gelman rubin pm.gelman_rubin(trace) # forestplot pm.forestplot(trace) # summary [look at mc error here. This is the std error, should be low] pm.summary(trace) #autocorrelation pm.autocorrplot(trace) # effective size pm.effective_n(trace)
def track_glm_hierarchical_ess(self, init): with glm_hierarchical_model(): start, step = pm.init_nuts(init=init, chains=self.chains, progressbar=False, random_seed=123) t0 = time.time() trace = pm.sample(draws=self.draws, step=step, njobs=4, chains=self.chains, start=start, random_seed=100) tot = time.time() - t0 ess = pm.effective_n(trace, ('mu_a',))['mu_a'] return ess / tot
def track_glm_hierarchical_ess(self, step): with glm_hierarchical_model(): if step is not None: step = step() t0 = time.time() trace = pm.sample(draws=self.draws, step=step, njobs=4, chains=4, random_seed=100) tot = time.time() - t0 ess = pm.effective_n(trace, ('mu_a',))['mu_a'] return ess / tot
def track_glm_hierarchical_ess(self, step): with glm_hierarchical_model(): if step is not None: step = step() t0 = time.time() trace = pm.sample(draws=self.draws, step=step, cores=4, chains=4, random_seed=100, progressbar=False, compute_convergence_checks=False) tot = time.time() - t0 ess = pm.effective_n(trace, ('mu_a',))['mu_a'] return ess / tot
def track_marginal_mixture_model_ess(self, init): model, start = mixture_model() with model: _, step = pm.init_nuts(init=init, chains=self.chains, progressbar=False, random_seed=123) start = [{k: v for k, v in start.items()} for _ in range(self.chains)] t0 = time.time() trace = pm.sample(draws=self.draws, step=step, njobs=4, chains=self.chains, start=start, random_seed=100) tot = time.time() - t0 ess = pm.effective_n(trace, ('mu',))['mu'].min() # worst case return ess / tot
def track_glm_hierarchical_ess(self, step, init): with self.model: t0 = time.time() trace = pm.sample(draws=20000, step=step(), njobs=4, chains=self.chains, start=self.start, random_seed=100) tot = time.time() - t0 ess = pm.effective_n(trace, ('mu_a', ))['mu_a'] return ess / tot
def track_glm_hierarchical_ess(self, step): with glm_hierarchical_model(): if step is not None: step = step() t0 = time.time() trace = pm.sample(draws=self.draws, step=step, njobs=4, chains=4, random_seed=100) tot = time.time() - t0 ess = pm.effective_n(trace, ('mu_a', ))['mu_a'] return ess / tot
def track_glm_hierarchical_ess(self, step): with glm_hierarchical_model(): if step is not None: step = step() t0 = time.time() trace = pm.sample(draws=self.draws, step=step, cores=4, chains=4, random_seed=100, progressbar=False, compute_convergence_checks=False) tot = time.time() - t0 ess = pm.effective_n(trace, ('mu_a', ))['mu_a'] return ess / tot
def track_glm_hierarchical_ess(self, init): with glm_hierarchical_model(): start, step = pm.init_nuts(init=init, chains=self.chains, progressbar=False, random_seed=123) t0 = time.time() trace = pm.sample(draws=self.draws, step=step, njobs=4, chains=self.chains, start=start, random_seed=100) tot = time.time() - t0 ess = pm.effective_n(trace, ('mu_a', ))['mu_a'] return ess / tot
def track_marginal_mixture_model_ess(self, init): model, start = mixture_model() with model: _, step = pm.init_nuts(init=init, chains=self.chains, progressbar=False, random_seed=123) start = [{k: v for k, v in start.items()} for _ in range(self.chains)] t0 = time.time() trace = pm.sample(draws=self.draws, step=step, njobs=4, chains=self.chains, start=start, random_seed=100) tot = time.time() - t0 ess = pm.effective_n(trace, ('mu', ))['mu'].min() # worst case return ess / tot
def test_neff(self): if hasattr(self, 'min_n_eff'): n_eff = pm.effective_n(self.trace[self.burn:]) for var in n_eff: npt.assert_array_less(self.min_n_eff, n_eff[var])
def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None, extend=False, include_transformed=False, alpha=0.05, start=0, batches=None): R"""Create a data frame with summary statistics. Parameters ---------- trace : MultiTrace instance varnames : list Names of variables to include in summary transform : callable Function to transform data (defaults to identity) stat_funcs : None or list A list of functions used to calculate statistics. By default, the mean, standard deviation, simulation standard error, and highest posterior density intervals are included. The functions will be given one argument, the samples for a variable as a 2 dimensional array, where the first axis corresponds to sampling iterations and the second axis represents the flattened variable (e.g., x__0, x__1,...). Each function should return either 1) A `pandas.Series` instance containing the result of calculating the statistic along the first axis. The name attribute will be taken as the name of the statistic. 2) A `pandas.DataFrame` where each column contains the result of calculating the statistic along the first axis. The column names will be taken as the names of the statistics. extend : boolean If True, use the statistics returned by `stat_funcs` in addition to, rather than in place of, the default statistics. This is only meaningful when `stat_funcs` is not None. include_transformed : bool Flag for reporting automatically transformed variables in addition to original variables (defaults to False). alpha : float The alpha level for generating posterior intervals. Defaults to 0.05. This is only meaningful when `stat_funcs` is None. start : int The starting index from which to summarize (each) chain. Defaults to zero. batches : None or int Batch size for calculating standard deviation for non-independent samples. Defaults to the smaller of 100 or the number of samples. This is only meaningful when `stat_funcs` is None. Returns ------- `pandas.DataFrame` with summary statistics for each variable Defaults one are: `mean`, `sd`, `mc_error`, `hpd_2.5`, `hpd_97.5`, `n_eff` and `Rhat`. Last two are only computed for traces with 2 or more chains. Examples -------- .. code:: ipython >>> import pymc3 as pm >>> trace.mu.shape (1000, 2) >>> pm.summary(trace, ['mu']) mean sd mc_error hpd_5 hpd_95 mu__0 0.106897 0.066473 0.001818 -0.020612 0.231626 mu__1 -0.046597 0.067513 0.002048 -0.174753 0.081924 n_eff Rhat mu__0 487.0 1.00001 mu__1 379.0 1.00203 Other statistics can be calculated by passing a list of functions. .. code:: ipython >>> import pandas as pd >>> def trace_sd(x): ... return pd.Series(np.std(x, 0), name='sd') ... >>> def trace_quantiles(x): ... return pd.DataFrame(pm.quantiles(x, [5, 50, 95])) ... >>> pm.summary(trace, ['mu'], stat_funcs=[trace_sd, trace_quantiles]) sd 5 50 95 mu__0 0.066473 0.000312 0.105039 0.214242 mu__1 0.067513 -0.159097 -0.045637 0.062912 """ from .backends import tracetab as ttab if varnames is None: varnames = get_default_varnames(trace.varnames, include_transformed=include_transformed) if batches is None: batches = min([100, len(trace)]) funcs = [lambda x: pd.Series(np.mean(x, 0), name='mean'), lambda x: pd.Series(np.std(x, 0), name='sd'), lambda x: pd.Series(mc_error(x, batches), name='mc_error'), lambda x: _hpd_df(x, alpha)] if stat_funcs is not None: if extend: funcs = funcs + stat_funcs else: funcs = stat_funcs var_dfs = [] for var in varnames: vals = transform(trace.get_values(var, burn=start, combine=True)) flat_vals = vals.reshape(vals.shape[0], -1) var_df = pd.concat([f(flat_vals) for f in funcs], axis=1) var_df.index = ttab.create_flat_names(var, vals.shape[1:]) var_dfs.append(var_df) dforg = pd.concat(var_dfs, axis=0) if (stat_funcs is not None) and (not extend): return dforg elif trace.nchains < 2: return dforg else: n_eff = pm.effective_n(trace, varnames=varnames, include_transformed=include_transformed) n_eff_pd = dict2pd(n_eff, 'n_eff') rhat = pm.gelman_rubin(trace, varnames=varnames, include_transformed=include_transformed) rhat_pd = dict2pd(rhat, 'Rhat') return pd.concat([dforg, n_eff_pd, rhat_pd], axis=1, join_axes=[dforg.index])
def loo(trace, model=None, pointwise=False, reff=None, progressbar=False): """Calculates leave-one-out (LOO) cross-validation for out of sample predictive model fit, following Vehtari et al. (2015). Cross-validation is computed using Pareto-smoothed importance sampling (PSIS). Parameters ---------- trace : result of MCMC run model : PyMC Model Optional model. Default None, taken from context. pointwise: bool if True the pointwise predictive accuracy will be returned. Default False reff : float relative MCMC efficiency, `effective_n / n` i.e. number of effective samples divided by the number of actual samples. Computed from trace by default. progressbar: bool Whether or not to display a progress bar in the command line. The bar shows the percentage of completion, the evaluation speed, and the estimated time to completion Returns ------- namedtuple with the following elements: loo: approximated Leave-one-out cross-validation loo_se: standard error of loo p_loo: effective number of parameters shape_warn: 1 if the estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples loo_i: array of pointwise predictive accuracy, only if pointwise True """ model = modelcontext(model) if reff is None: if trace.nchains == 1: reff = 1. else: eff = pm.effective_n(trace) eff_ave = pm.stats.dict2pd(eff, 'eff').mean() samples = len(trace) * trace.nchains reff = eff_ave / samples log_py = _log_post_trace(trace, model, progressbar=progressbar) if log_py.size == 0: raise ValueError('The model does not contain observed values.') lw, ks = _psislw(-log_py, reff) lw += log_py warn_mg = 0 if np.any(ks > 0.7): warnings.warn("""Estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples. You should consider using a more robust model, this is because importance sampling is less likely to work well if the marginal posterior and LOO posterior are very different. This is more likely to happen with a non-robust model and highly influential observations.""") warn_mg = 1 loo_lppd_i = - 2 * logsumexp(lw, axis=0) loo_lppd = loo_lppd_i.sum() loo_lppd_se = (len(loo_lppd_i) * np.var(loo_lppd_i)) ** 0.5 lppd = np.sum(logsumexp(log_py, axis=0, b=1. / log_py.shape[0])) p_loo = lppd + (0.5 * loo_lppd) if pointwise: if np.equal(loo_lppd, loo_lppd_i).all(): warnings.warn("""The point-wise LOO is the same with the sum LOO, please double check the Observed RV in your model to make sure it returns element-wise logp. """) LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, shape_warn, LOO_i') return LOO_r(loo_lppd, loo_lppd_se, p_loo, warn_mg, loo_lppd_i) else: LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, shape_warn') return LOO_r(loo_lppd, loo_lppd_se, p_loo, warn_mg)
def effective_n_all(trace): return pm.effective_n(trace)
step = pm.Metropolis() trace = pm.sample(1000, step=step) burnin = 100 chaine = trace[burnin:] pm.traceplot(chaine, lines={'theta': theta_real}) plt.savefig('img204.png') with first_model: multi_trace = pm.sample(1000, step=step, njobs=4) burnin = 100 multi_chaine = multi_trace[burnin:] pm.traceplot(multi_chaine, lines={'theta': theta_real}) plt.savefig('img206.png') pm.gelman_rubin(multi_chaine) {'theta': 1.0074579751170656, 'theta_logodds': 1.009770031607315} pm.forestplot(multi_chaine, varnames={'theta'}) plt.savefig('img207.png') print(pm.summary(multi_chaine)) pm.autocorrplot(multi_chaine) plt.savefig('img208.png') print(pm.effective_n(multi_chaine)['theta']) pm.plot_posterior(chaine, kde_plot=True, rope=[0.45, 0.55], ref_val=0.5) plt.savefig('img209.png')
plt.show() print('\n') print('plot_posterior...') print('\n') pm.plots.plot_posterior(trace, ['alpha_'], credible_interval=.95) pm.plots.plot_posterior(trace, ['rate_'], credible_interval=.95) plt.show() print('\n') print('autocorrplot...') print('\n') pm.plots.autocorrplot(trace, ['alpha_']) pm.plots.autocorrplot(trace, ['rate_']) plt.show() # effective sample sizes for alpha and rate print('Effective Sample Sizes : ', pm.effective_n(trace)) # run gelman rubin test. should be close to 1 print('Gelman Rubin Test for Convergence :', pm.gelman_rubin(trace)) # use geweke function which is a time-series approach that compares the mean and # variance of segments from the beginning and end of a single chain. # from work done by John Geweke # use geweke to take a look at convergence # by default it take beginning 10% and 50% from end of chain # not strong evidence that the means are the same which implies convergence # should have abs value less than one at convergence. this is a visual. gw_plot1 = pm.geweke(trace['alpha_']) plt.scatter(gw_plot1[:, 0], gw_plot1[:, 1]) plt.axhline(-1.98, c='r')
step = pm.Metropolis() trace = pm.sample(1000, step=step, start=start) burnin = 0 # no burnin chain = trace[burnin:] pm.traceplot(chain, lines={'theta': theta_true}) with beta_binomial: step = pm.Metropolis() multi_trace = pm.sample(1000, step=step, njobs=4) burnin = 0 # no burnin multi_chain = multi_trace[burnin:] pm.traceplot(multi_chain, lines={'theta': theta_true}) # convergence pm.gelman_rubin(multi_chain) pm.forestplot(multi_chain, varnames=['theta']) # summary pm.summary(multi_chain) # autocorrelation pm.autocorrplot(chain) # effective size pm.effective_n(multi_chain)['theta'] # Summerize the posterior pm.plot_posterior(chain, kde_plot=True) plt.show()
def test_neff(self): if hasattr(self, 'min_n_eff'): n_eff = pm.effective_n(self.trace[self.burn:]) for var in n_eff: npt.assert_array_less(self.min_n_eff, n_eff[var])
def Marginal_llk(mtrace, model=None, ADVI=False, trace2=None, logp=None, maxiter=1000, burn_in=1000): """The Bridge Sampling Estimator of the Marginal Likelihood. Parameters ---------- mtrace : MultiTrace, result of MCMC run model : PyMC Model Optional model. Default None, taken from context. logp : Model Log-probability function, read from the model by default maxiter : Maximum number of iterations Returns ------- marg_llk : Estimated Marginal log-Likelihood. """ r0, tol1, tol2 = 0.5, 1e-2, 1e-2 model = modelcontext(model) if logp is None: logp = model.logp_array vars = model.free_RVs len_trace = len(mtrace) if ADVI == False: nchain = mtrace.nchains N1_ = len_trace // 2 N1 = N1_ * nchain N2 = len_trace * nchain - N1 neff_list = dict() else: nchain = 2 N1_ = len_trace N1 = N1_ N2 = len_trace arraysz = model.bijection.ordering.size samples_4_fit = np.zeros((arraysz, N1)) samples_4_iter = np.zeros((arraysz, N2)) for var in vars: varmap = model.bijection.ordering.by_name[var.name] neff_list = dict() if ADVI == True: x = mtrace[0:N1_][var.name] samples_4_fit[varmap.slc, :] = x else: x = mtrace[0:N1_][var.name] samples_4_fit[varmap.slc, :] = x.reshape( (x.shape[0], np.prod(x.shape[1:], dtype=int))).T if ADVI == True: x2 = trace2[0:][var.name] samples_4_iter[varmap.slc, :] = x2 neff_list.update(pm.effective_n(trace2[0:], varnames=[var.name])) else: x2 = mtrace[N1_:][var.name] samples_4_iter[varmap.slc, :] = x2.reshape( (x2.shape[0], np.prod(x2.shape[1:], dtype=int))).T neff_list.update(pm.effective_n(mtrace[N1_:], varnames=[var.name])) neff = pm.stats.dict2pd(neff_list, 'temp').median() m = np.mean(samples_4_fit, axis=1) V = np.cov(samples_4_fit) if np.all(np.linalg.eigvals(V) > 0): L = chol(V, lower=True) else: print('SDP converting') V = sdp.nearPD(V) L = chol(V, lower=True) print('m: ', np.sum(np.isinf(m[:, None]))) gen_samples = m[:, None] + dot( L, st.norm.rvs(0, 1, size=samples_4_iter.shape)) print('gen_samples: ', np.sum(np.isinf(gen_samples))) #gen_samples[gen_samples == inf] = 0 # Evaluate proposal distribution for posterior & generated samples q12 = st.multivariate_normal.logpdf(samples_4_iter.T, m, V) q22 = st.multivariate_normal.logpdf(gen_samples.T, m, V) print('q12: ', np.sum(np.isinf(q12))) print('q22: ', np.sum(np.isinf(q22))) # Evaluate unnormalized posterior for posterior & generated samples q11 = np.asarray([logp(point) for point in samples_4_iter.T]) q21 = np.asarray([logp(point) for point in gen_samples.T]) q21[np.isneginf(q21)] = -100000 q11[np.isneginf(q11)] = -100000 def iterative_scheme(q11, q12, q21, q22, r0, neff, tol, maxiter, criterion): l1 = q11 - q12 l2 = q21 - q22 lstar = np.median(l1) # To increase numerical stability, # subtracting the median of l1 from l1 & l2 later print('neef: ', neff) s1 = neff / (neff + N2) s2 = N2 / (neff + N2) r = r0 r_vals = [r] logml = np.log(r) + lstar criterion_val = 1 + tol i = 0 while (i <= maxiter) & (criterion_val > tol): print('i: ', i) print('maxiter', maxiter) print('criterionval: ', criterion_val) print('tol: ', tol) rold = r logmlold = logml numi = np.exp(l2 - lstar) / (s1 * np.exp(l2 - lstar) + s2 * r) print('l2: ', l2) print('lstar: ', lstar) print('s1: ', s1) print('r :', r) print('Num: ', numi) deni = 1 / (s1 * np.exp(l1 - lstar) + s2 * r) print('Den: ', deni) if np.sum(~np.isfinite(numi)) + np.sum(~np.isfinite(deni)) > 0: warn("""Infinite value in iterative scheme, returning NaN. Try rerunning with more samples.""") r = (N1 / N2) * np.sum(numi) / np.sum(deni) print('r: ', r) r_vals.append(r) logml = np.log(r) + lstar print('Logml: ', logml) i += 1 if criterion == 'r': criterion_val = np.abs((r - rold) / r) elif criterion == 'logml': criterion_val = np.abs((logml - logmlold) / logml) print('criterion val: ', criterion_val) if i >= maxiter: return dict(logml=np.NaN, niter=i, r_vals=np.asarray(r_vals)) else: return dict(logml=logml, niter=i) tmp = iterative_scheme(q11, q12, q21, q22, r0, neff, tol1, maxiter, 'r') if ~np.isfinite(tmp['logml']): warn("""logml could not be estimated within maxiter, rerunning with adjusted starting value. Estimate might be more variable than usual.""" ) # use geometric mean as starting value r0_2 = np.sqrt(tmp['r_vals'][-2] * tmp['r_vals'][-1]) tmp = iterative_scheme(q11, q12, q21, q22, r0_2, neff, tol2, maxiter, 'r') return dict(logml=tmp['logml'], niter=tmp['niter'], method="normal", q11=q11, q12=q12, q21=q21, q22=q22)
# [Gelman–Rubin convergence diagnostic using multiple chains # ](https://blog.stata.com/2016/05/26/gelman-rubin-convergence-diagnostic-using-multiple-chains/) # # [Convergence tests for MCMC](https://www.imperial.ac.uk/media/imperial-college/research-centres-and-groups/astrophysics/public/icic/data-analysis-workshop/2018/Convergence-Tests.pdf) pm.gelman_rubin(chain) # ## 關於模型效率 # # 兩個$\theta^{i}$的自相關性,如何解釋MCMC會造成自相關性? pm.autocorrplot(chain) pm.forestplot(chain, varnames=['theta']) pm.stats.summary(chain) # ## 總結後驗 pm.effective_n(chain) pm.plot_posterior(chain) # ## 基於後驗的決策 pm.plot_posterior(chain, rope=[0.45, 0.55]) pm.plot_posterior(chain, ref_val=0.5) # ## 損失函數
pm.traceplot(chain, lines={'theta':theta_real}) ''' with first_model: step = pm.Metropolis() multi_trace = pm.sample(1000, step=step, njobs=4, cores=1) #njobs=4 burnin = 0 multi_chain = multi_trace[burnin:] pm.traceplot(multi_chain, lines={'theta': theta_real}) # In[6]: pm.gelman_rubin(multi_chain) # In[7]: pm.forestplot(multi_chain, varnames=['theta']) # In[8]: pm.summary(multi_chain) # # In[11]: pm.autocorrplot(multi_chain) #自相关 # In[12]: pm.effective_n(multi_chain)['theta'] #有效采样大小
def loo(trace, model=None, pointwise=False, reff=None, progressbar=False): """Calculates leave-one-out (LOO) cross-validation for out of sample predictive model fit, following Vehtari et al. (2015). Cross-validation is computed using Pareto-smoothed importance sampling (PSIS). Parameters ---------- trace : result of MCMC run model : PyMC Model Optional model. Default None, taken from context. pointwise: bool if True the pointwise predictive accuracy will be returned. Default False reff : float relative MCMC efficiency, `effective_n / n` i.e. number of effective samples divided by the number of actual samples. Computed from trace by default. progressbar: bool Whether or not to display a progress bar in the command line. The bar shows the percentage of completion, the evaluation speed, and the estimated time to completion Returns ------- namedtuple with the following elements: loo: approximated Leave-one-out cross-validation loo_se: standard error of loo p_loo: effective number of parameters loo_i: array of pointwise predictive accuracy, only if pointwise True """ model = modelcontext(model) if reff is None: if trace.nchains == 1: reff = 1. else: eff = pm.effective_n(trace) eff_ave = pm.stats.dict2pd(eff, 'eff').mean() samples = len(trace) * trace.nchains reff = eff_ave / samples log_py = _log_post_trace(trace, model, progressbar=progressbar) if log_py.size == 0: raise ValueError('The model does not contain observed values.') lw, ks = _psislw(-log_py, reff) lw += log_py if np.any(ks > 0.7): warnings.warn("""Estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples. You should consider using a more robust model, this is because importance sampling is less likely to work well if the marginal posterior and LOO posterior are very different. This is more likely to happen with a non-robust model and highly influential observations.""") loo_lppd_i = -2 * logsumexp(lw, axis=0) loo_lppd = loo_lppd_i.sum() loo_lppd_se = (len(loo_lppd_i) * np.var(loo_lppd_i))**0.5 lppd = np.sum(logsumexp(log_py, axis=0, b=1. / log_py.shape[0])) p_loo = lppd + (0.5 * loo_lppd) if pointwise: LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, LOO_i') return LOO_r(loo_lppd, loo_lppd_se, p_loo, loo_lppd_i) else: LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO') return LOO_r(loo_lppd, loo_lppd_se, p_loo)
def posterior_effn(self): return pm.effective_n(self.posterior_)
def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None, extend=False, include_transformed=False, alpha=0.05, start=0, batches=None): R"""Create a data frame with summary statistics. Parameters ---------- trace : MultiTrace instance varnames : list Names of variables to include in summary transform : callable Function to transform data (defaults to identity) stat_funcs : None or list A list of functions used to calculate statistics. By default, the mean, standard deviation, simulation standard error, and highest posterior density intervals are included. The functions will be given one argument, the samples for a variable as a 2 dimensional array, where the first axis corresponds to sampling iterations and the second axis represents the flattened variable (e.g., x__0, x__1,...). Each function should return either 1) A `pandas.Series` instance containing the result of calculating the statistic along the first axis. The name attribute will be taken as the name of the statistic. 2) A `pandas.DataFrame` where each column contains the result of calculating the statistic along the first axis. The column names will be taken as the names of the statistics. extend : boolean If True, use the statistics returned by `stat_funcs` in addition to, rather than in place of, the default statistics. This is only meaningful when `stat_funcs` is not None. include_transformed : bool Flag for reporting automatically transformed variables in addition to original variables (defaults to False). alpha : float The alpha level for generating posterior intervals. Defaults to 0.05. This is only meaningful when `stat_funcs` is None. start : int The starting index from which to summarize (each) chain. Defaults to zero. batches : None or int Batch size for calculating standard deviation for non-independent samples. Defaults to the smaller of 100 or the number of samples. This is only meaningful when `stat_funcs` is None. See also -------- summary : Generate a pretty-printed summary of a trace. Returns ------- `pandas.DataFrame` with summary statistics for each variable Defaults one are: `mean`, `sd`, `mc_error`, `hpd_2.5`, `hpd_97.5`, `n_eff` and `Rhat`. Last two are only computed for traces with 2 or more chains. Examples -------- .. code:: ipython >>> import pymc3 as pm >>> trace.mu.shape (1000, 2) >>> pm.summary(trace, ['mu']) mean sd mc_error hpd_5 hpd_95 mu__0 0.106897 0.066473 0.001818 -0.020612 0.231626 mu__1 -0.046597 0.067513 0.002048 -0.174753 0.081924 n_eff Rhat mu__0 487.0 1.00001 mu__1 379.0 1.00203 Other statistics can be calculated by passing a list of functions. .. code:: ipython >>> import pandas as pd >>> def trace_sd(x): ... return pd.Series(np.std(x, 0), name='sd') ... >>> def trace_quantiles(x): ... return pd.DataFrame(pm.quantiles(x, [5, 50, 95])) ... >>> pm.summary(trace, ['mu'], stat_funcs=[trace_sd, trace_quantiles]) sd 5 50 95 mu__0 0.066473 0.000312 0.105039 0.214242 mu__1 0.067513 -0.159097 -0.045637 0.062912 """ if varnames is None: varnames = get_default_varnames( trace.varnames, include_transformed=include_transformed) if batches is None: batches = min([100, len(trace)]) funcs = [ lambda x: pd.Series(np.mean(x, 0), name='mean'), lambda x: pd.Series(np.std(x, 0), name='sd'), lambda x: pd.Series(mc_error(x, batches), name='mc_error'), lambda x: _hpd_df(x, alpha) ] if stat_funcs is not None: if extend: funcs = funcs + stat_funcs else: funcs = stat_funcs var_dfs = [] for var in varnames: vals = transform(trace.get_values(var, burn=start, combine=True)) flat_vals = vals.reshape(vals.shape[0], -1) var_df = pd.concat([f(flat_vals) for f in funcs], axis=1) var_df.index = ttab.create_flat_names(var, vals.shape[1:]) var_dfs.append(var_df) dforg = pd.concat(var_dfs, axis=0) if (stat_funcs is not None) and (not extend): return dforg elif trace.nchains < 2: return dforg else: n_eff = pm.effective_n(trace, varnames=varnames, include_transformed=include_transformed) n_eff_pd = dict2pd(n_eff, 'n_eff') rhat = pm.gelman_rubin(trace, varnames=varnames, include_transformed=include_transformed) rhat_pd = dict2pd(rhat, 'Rhat') return pd.concat([dforg, n_eff_pd, rhat_pd], axis=1, join_axes=[dforg.index])
# forest plot pm.forestplot([t_0, t_1, t_2], figsize=(16, 12), textsize=20, markersize=20) # acceptance rate print('Model 0: acc_rate = ' + str(step_0.accepted / (niter * nchains))) print('Model 1: acc_rate = ' + str(step_1.accepted / (niter * nchains))) print('Model 2: acc_rate = ' + str(step_2.accepted / (niter * nchains))) # ACF pm.autocorrplot(t_0, var_names=['m'], combined=True, textsize=20) pm.autocorrplot(t_1, var_names=['m'], combined=True, textsize=20) pm.autocorrplot(t_2, var_names=['m'], combined=True, textsize=20) # ESS print(pm.effective_n(t_0)) print(pm.effective_n(t_1)) print(pm.effective_n(t_2)) # Gelman-Rubin print(pm.gelman_rubin(t_0)) print(pm.gelman_rubin(t_1)) print(pm.gelman_rubin(t_2)) # Geweke plt.rcParams['figure.figsize'] = (16, 12) plt.rcParams['xtick.labelsize'] = 30 plt.rcParams['ytick.labelsize'] = 30 plt.rcParams['axes.labelsize'] = 40 gew_0 = pm.geweke(t_0.m, first=.1, last=.5) plt.scatter(gew_0[:, 0], gew_0[:, 1])