def test_logistic_regression(crossed_data): # Tests passing link="logit" is equivalent to using tt.nnet.sigmoid model0 = Model( "threecats['b'] ~ continuous + dummy", crossed_data, family="bernoulli", link="logit" ) fitted0 = model0.fit( tune=0, draws=1000, ) # build model using fit, pymc3 and theano link function model1 = Model( "threecats['b'] ~ continuous + dummy", crossed_data, family="bernoulli", link=tt.nnet.sigmoid, ) fitted1 = model1.fit( tune=0, draws=1000, ) # check that using a theano link function works assert np.allclose(az.summary(fitted0)["mean"], az.summary(fitted1)["mean"], atol=0.2) # check that term names agree assert set(model0.term_names) == set(model1.term_names) # check that common effect design matrices are the same, # even if term names / level names / order of columns is different X0 = set( [ tuple(t.data[:, lev]) for t in model0.common_terms.values() for lev in range(len(t.levels)) ] ) X1 = set( [ tuple(t.data[:, lev]) for t in model1.common_terms.values() for lev in range(len(t.levels)) ] ) assert X0 == X1 # check that models have same priors for common effects priors0 = {x.name: x.prior.args for x in model0.terms.values() if not x.group_specific} priors1 = {x.name: x.prior.args for x in model1.terms.values() if not x.group_specific} # check dictionary keys assert set(priors0) == set(priors1) # check dictionary values def dicts_close(a, b): if set(a) != set(b): return False else: return [np.allclose(a[x], b[x], atol=0, rtol=0.01) for x in a.keys()] assert all([dicts_close(priors0[x], priors1[x]) for x in priors0.keys()])
def exercise4(): with pm.Model() as basic_model: probabilities = [0.3, 0.7, 0.95] likelihood_params = np.array( [np.divide(1, 3) * (1 + 2 * prob) for prob in probabilities]) group = pm.Categorical('group', p=np.array([1, 1, 1])) p = pm.Deterministic('p', theano.shared(likelihood_params)[group]) positive_answers = pm.Binomial('positive_answers', n=num_questions, p=p, observed=[7]) trace = pm.sample(4000, progressbar=True) az.plot_trace(trace) plt.show() az.plot_posterior(trace) plt.show() az.summary(trace) return trace
def test_disaster_model_missing(self): model = build_disaster_model(masked=True) with model: # Initial values for stochastic nodes start = {"early_mean": 2.0, "late_mean": 3.0} # Use slice sampler for means (other variables auto-selected) step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) tr = pm.sample(500, tune=50, start=start, step=step, chains=2) az.summary(tr)
def get_params(n, input_params, input_traces=False): """ Helper function to extract parameters from fit to polynomial of degree n using pymc3 traces Parameters ---------- n: integer indicates the power of the polynomial fit input_params : can be either best_fit or traces best_fit : dict with 1-D numpy arrays of floats parameter values from the model and covariance matrix traces : pymc3 MultiTrace object Traces generated from MCMC sampling input_traces : bool indicates whether input is best_fit or traces Returns ------- params, param_errors: 1-D numpy arrays of floats parameter values from the model standard deviations of each parameter """ # extract parameters and uncertainty using arviz if input_traces == True: params_list = [] params_uncert = [] for parameter in ['C_{}'.format(i) for i in range(n + 1)]: params_list.append( az.summary(input_params, round_to=9)['mean'][parameter]) params_uncert.append( az.summary(input_params, round_to=9)['sd'][parameter]) params = np.array(params_list) params_errors = np.array(params_uncert) else: best_fit_list = [] best_fit_errors_list = [] parameters = [] errors = {} for i in range(n + 1): parameters = parameters + ['C_{}'.format(i)] errors['C_{}'.format(i)] = np.sqrt( input_params['covariance matrix'][i][i]) for parameter in parameters: best_fit_list.append(input_params[parameter]) best_fit_errors_list.append(errors[parameter]) params = np.array(best_fit_list) params_errors = np.array(best_fit_errors_list) return params, params_errors
def test_disaster_model(self): model = build_disaster_model(masked=False) with model: # Initial values for stochastic nodes start = {"early_mean": 2, "late_mean": 3.0} # Use slice sampler for means (other variables auto-selected) step = pm.Slice( [model["early_mean_log__"], model["late_mean_log__"]]) idata = pm.sample(500, tune=50, start=start, step=step, chains=2) az.summary(idata)
def test_bayes_nonparametric(): data = pd.DataFrame({'x':[0,5], 'y':[0,0], 'z':[0,1]}) # 2 connected segments, non-parametric changepoint model = sgmt.bayes(['y~1+x', '0+x'], data=data) # 3 connected segments, static changepoints model = sgmt.bayes(['y~1+x'] + 2*['0+x'], data=data) # 2 disconnected segments, static changepoint model = sgmt.bayes(['y~1+x', '1+x'], data=data) # 2 connected segments, covariates, and parametric changepoints # here, income is a covariate in the model so it is ambiguous which # varaible the changepoint will operate over # so we need to specify it explicitly data = pd.DataFrame({'score':[0,0], 'time':[0,5], 'income':[12,33], 'IQ':[0,1]}) model = sgmt.bayes(['score~1+time+income', '1+time+income'], x_var='time', data=data) # focus on estimation data = pd.DataFrame({'y':rng.random(size=100), 'x':rng.random(size=100)}) model = sgmt.bayes(['y~1+x', '0+x'], data=data) # fit model trace = model.fit() # summarize the model estimation az.summary(trace) # 3 connected segments, static but explicit changepoints model = sgmt.bayes(['y~1+x'] + 2*['0+x'], changepoints=2*['1'], data=data) # 2 disconnected segments, static but explicit changepoint model = sgmt.bayes(['y~1+x', '1+x'], changepoints=['1'], data=data) # we also allow explicit indication of the the outcome # variable as it might ease the writing of segment specifications data = pd.DataFrame({'y':[0,0], 'x':[0,5]}) sgmt.bayes(5 * ['1+x'], changepoints=4*['1'], y_var='y', data=data) # at some point we can permit these sort of implicit specs # fill in changepoints with intercepts #sgmt.bayes(['y', '1+x', '1+x'], data=data) # equivalent to: # sgmt.bayes(['y', '1+x', '1+x'], changepoints=['1', '1','1'], data=data) # focus on estimation data = pd.DataFrame({'y':rng.random(size=100), 'x':rng.random(size=100)}) model = sgmt.bayes(['y~1+x', '0+x'], changepoints=['1'], data=data) # fit model idata = model.fit() # summarize the model estimation az.summary(idata)
def basic_test(): # Initialize model with pm.Model() as model: # E.g., to define a flat prior # with some limits #z = pm.Uniform('z', lower=0.0, upper=3.0) # prior mu = pm.Normal('mu', mu=0, sigma=1) # Observed data obs = pm.Normal('obs', mu=mu, sigma=1, observed=np.random.randn(1000)) # Run sampler idata = pm.sample(2000, tune=1500, return_inferencedata=True) print(idata.posterior.dims) az.plot_trace(idata) summary = az.summary(idata) print("Summary:") print(summary) plt.show() return None
def skew_normal_prog(): sample_b = poisson.rvs(lambda_b, size=2500) sample_u = poisson.rvs(mu=lambda_u, size=2500) a_b, loc_b, scale_b = stats.skewnorm.fit(sample_b) a_u, loc_u, scale_u = stats.skewnorm.fit(sample_u) basic_model = pm.Model() with basic_model: x1 = pm.SkewNormal('x1', mu=loc_b, sigma=scale_b, alpha=a_b) x2 = pm.SkewNormal('x2', mu=loc_b, sigma=scale_b, alpha=a_b) u = pm.SkewNormal('u', mu=loc_u, sigma=scale_u, alpha=a_u, observed=x1 + x2) with basic_model: trace = pm.sample(5000) #the two posterior mean are numerically slightly different #we average them skew_mean = np.mean(az.summary(trace)["mean"]) neg_x1 = np.mean(trace.get_values('x1') < 0) neg_x2 = np.mean(trace.get_values('x2') < 0) skew_prob = np.mean([neg_x1, neg_x2]) return (skew_mean, skew_prob)
def model_summary(self): """ """ if self.summary is None: self.summary = az.summary(self.trace, var_names=["~chol","~vals"], round_to=2) pass
def samplePosterior(model, N, fit_intercept=False, fit_slope=True): """ Monte Carlo for the posterior. Sample posterior predictive """ RANDOM_SEED = 58 with model: step = pm.NUTS() trace = pm.sample(N, step) if fit_intercept and not fit_slope: var_names = ["Intercept", "Y_obs"] summary_names = ["Intercept"] elif not fit_intercept and fit_slope: var_names = ["slope", "Y_obs"] summary_names = ["slope"] else: var_names = ["Intercept", "slope", "Y_obs"] summary_names = ["Intercept", "slope"] ppc = pm.sample_posterior_predictive(trace, var_names=var_names, random_seed=RANDOM_SEED) summary = az.summary(trace, var_names=summary_names, round_to=3) print(summary) params = {} for name in summary_names: params[name] = {} params[name]['hpd_3%'] = summary['hpd_3%'][name] params[name]['hpd_mean'] = summary['mean'][name] params[name]['hpd_97%'] = summary['hpd_97%'][name] return params, ppc['Y_obs']
def fit(self,steps=1000, tune=1000, summarise=False): """ Fit the model to infer the correlation coefficient Parameters ---------- steps : int, optional, default 1000 Number of MCMC steps per chain after burn-in tune : int, optional, default 1000 Number of steps per chain for burn-in summarise : bool, default False Whether to produce the table summary (also available through summarise()) """ with self.model: self.trace = pm.sample( steps, tune=tune, target_accept=0.9, compute_convergence_checks=False,return_inferencedata=True ) self.fitted=True if summarise: self.summary = az.summary(self.trace, var_names=["~chol"], round_to=2) #self.rho = [self.summary['hdi_3%'][chol_corr[1,0]],self.summary['mean'][chol_corr[1,0]],self.summary['hdi_97%'][chol_corr[1,0]]] print(self.summary) return self.trace, self.summary return self.trace
def poisson_prog_monthly(): basic_model = pm.Model() with basic_model: x1 = pm.Poisson ('x1', mu = lambda_b) x2 = pm.Poisson ('x2', mu = lambda_b) x3 = pm.Poisson ('x3', mu = lambda_b) x4 = pm.Poisson ('x4', mu = lambda_b) x5 = pm.Poisson ('x5', mu = lambda_b) x6 = pm.Poisson ('x6', mu = lambda_b) x7 = pm.Poisson ('x7', mu = lambda_b) x8 = pm.Poisson ('x8', mu = lambda_b) x9 = pm.Poisson ('x9', mu = lambda_b) x10 = pm.Poisson ('x10', mu = lambda_b) x11 = pm.Poisson ('x11', mu = lambda_b) x12 = pm.Poisson ('x12', mu = lambda_b) q1 = pm.Poisson ('q1', mu = 3 * lambda_b, observed = x1 + x2 + x3 + x4) q2 = pm.Poisson ('q2', mu = 3 * lambda_b, observed = x5 + x6 + x7 + x8) q3 = pm.Poisson ('q3', mu = 3 * lambda_b, observed = x9 + x10 + x11 + x12) s1 = pm.Poisson ('s1', mu = 5 * lambda_b, observed = x1 + x2 + x3 + x4 + x5 + x6) s2 = pm.Poisson ('s2', mu = 8 * lambda_b, observed = x7 + x8 + x9 + x10 + x11 + x12) y = pm.Poisson ('y', mu = 14 *lambda_b, observed = x1 + x2 + x3 + x4 + x5 + x6 +x7 + x8 + x9 + x10 + x11 + x12) with basic_model: trace = pm.sample(5000) #the two posterior mean are numerically slightly different #we average them pois_mean = np.mean(az.summary(trace)["mean"]) return (pois_mean)
def test_categorical_term(): data = pd.DataFrame( { "y": np.random.normal(size=6), "x1": np.random.normal(size=6), "x2": [1, 1, 0, 0, 1, 1], "g1": ["a"] * 3 + ["b"] * 3, "g2": ["x", "x", "z", "z", "y", "y"], } ) model = Model("y ~ x1 + x2 + g1 + (g1|g2) + (x2|g2)", data) fitted = model.fit(draws=10) df = az.summary(fitted) names = [ "Intercept", "x1", "x2", "g1[b]", "1|g2_sigma", "1|g2[x]", "1|g2[y]", "1|g2[z]", "g1|g2_sigma[b]", "g1|g2[b, x]", "g1|g2[b, y]", "g1|g2[b, z]", "x2|g2_sigma", "x2|g2[x]", "x2|g2[y]", "x2|g2[z]", "y_sigma", ] assert list(df.index) == names
def infer_nonrobust_model(): with pm.Model() as model_0: α = pm.Normal('α', mu=0, sd=10) β = pm.Normal('β', mu=0, sd=10) μ = α + pm.math.dot(x_c, β) θ = pm.Deterministic('θ', pm.math.sigmoid(μ)) bd = pm.Deterministic('bd', -α / β) # decision boundary yl = pm.Bernoulli('yl', p=θ, observed=y_0) trace = pm.sample(1000, cores=1, chains=2) varnames = ['α', 'β', 'bd'] az.summary(trace, varnames) return trace
def test_fit_hmc_m32(): """Generate samples from the posterior distribution""" n_cpu = 1 np.random.seed(1) N = 50 t = np.linspace(0, 1, N) y = np.sin(12 * t) + 0.66 * np.cos(25 * t) + np.random.randn(N) * 0.1 df = pd.DataFrame(index=t, data=y, columns=['y']) par = [ dict(name='mscale', value=9.313e-01, bounds=(0, None), prior=Gamma(4, 4)), dict(name='lscale', value=1.291e-01, bounds=(0, None), prior=InverseGamma(3.5, 0.5)), dict(name='sigv', value=9.241e-02, bounds=(0, None), prior=InverseGamma(3.5, 0.5)), ] reg = Regressor(Matern32(par)) fit = reg.fit(df=df, outputs='y', options={'init': 'fixed', 'n_cpu': n_cpu}) # return df, reg, fit diagnostic = fit.diagnostic assert isinstance(diagnostic, pd.DataFrame) assert np.all(diagnostic['ebfmi'] > 0.8) assert np.all(diagnostic['mean accept_prob'] > 0.7) assert np.sum(diagnostic['sum diverging']) == 0 assert np.sum(diagnostic['sum max_tree_depth']) == 0 summary = az.summary(fit.posterior, round_to='none') assert isinstance(summary, pd.DataFrame) assert np.all(summary['r_hat'] < 1.01) assert np.all(summary[['ess_mean', 'ess_sd', 'ess_bulk', 'ess_tail']] > 1000) # mcse for ess_mean = 1000 assert summary['mean']['mscale'] == pytest.approx(1.107023, abs=3 * 0.009261) assert summary['mean']['lscale'] == pytest.approx(0.146614, abs=3 * 0.001074) assert summary['mean']['sigv'] == pytest.approx(0.096477, abs=3 * 0.000515) assert summary['mean']['lp_'] == pytest.approx(2.919439, abs=3 * 0.038186) xm, xsd = reg.posterior_state_distribution( trace=fit.posterior, df=df, outputs='y', smooth=True, n_cpu=n_cpu ) assert isinstance(xm, np.ndarray) assert isinstance(xsd, np.ndarray) assert xm.shape == (4000, len(df), reg.ss.nx) assert xsd.shape == (4000, len(df), reg.ss.nx) assert np.mean(np.mean((df['y'].values - xm[:, :, 0]) ** 2, axis=1) ** 0.5) == pytest.approx( 5.839e-2, abs=1e-2 ) ym, ysd = reg.posterior_predictive(trace=fit.posterior, df=df, outputs='y', n_cpu=n_cpu) assert isinstance(ym, np.ndarray) assert isinstance(ysd, np.ndarray) assert ym.shape == (4000, len(df)) assert ysd.shape == (4000, len(df)) assert np.mean(np.mean((df['y'].values - ym) ** 2, axis=1) ** 0.5) == pytest.approx( 3.728e-2, abs=1e-2 ) pw_loglik = reg.pointwise_log_likelihood(trace=fit.posterior, df=df, outputs='y', n_cpu=n_cpu) assert isinstance(pw_loglik, dict) assert pw_loglik['log_likelihood'].shape == (4, 1000, len(df)) # 0.026 ~ pw_loglik['log_likelihood'].sum(axis=2).std() / np.sqrt(1000) assert pw_loglik['log_likelihood'].sum(axis=2).mean() == pytest.approx(-1.394, abs=3.256e-2)
def bms(L, hdi_prob=0.95, **sample_kwargs): """This function computes the exceedance probabilities (xp) and expected relative frequencies (r) from an array of log-evidences. Args: L (numpy.ndarray): Array of model log-evidences (higher is better fit). Array shape should be (K models; N subjects) **sample_kwargs: Additional arguments to the pymc.sample function. Currently `cores=1` seems to be necessary. Returns: dict: Dictionary with values xp and r. Reference: Stephan, K. E., Penny, W. D., Daunizeau, J., Moran, R. J., & Friston, K. J. (2009). Bayesian model selection for group studies. Neuroimage, 46(4), 1004-1017. """ K, N = L.shape with pm.Model() as bms: def lookup_L(L, N): """This function looks up the log-evidences for all N subjects, given the current model labels m. """ return L[tt.cast(m, dtype="int32"), tt.cast(tt.arange(N), dtype="int32")] # Priors alpha = pm.Uniform("alpha", 0, N, shape=K, testval=np.ones(K)) # Model r = pm.Dirichlet("r", a=alpha, testval=np.ones(K) / K) m = pm.Categorical("m", p=r, shape=N, testval=0) # Look up log evidence ll = pm.DensityDist("ll", logp=lookup_L, observed=dict(L=L, N=N)) # Sample inferencedata = pm.sample(return_inferencedata=True, **sample_kwargs) # Build results result = {} result["summary"] = az.summary(inferencedata, hdi_prob=hdi_prob, var_names=["alpha", "r"]) result["xp"] = np.array([ np.mean(inferencedata.posterior["r"].data[:, :, k] == inferencedata.posterior["r"].data.max(axis=-1)) for k in range(K) ]) r_unscaled = np.array([ np.mean(inferencedata.posterior["r"].data[:, :, k]) for k in range(K) ]) result["r"] = r_unscaled / r_unscaled.sum() return result
def get_summary(RB_model, trace, hdi_prob=.94, kind='all'): with RB_model: # (hdi_prob=.94 is default) az_summary = az.summary(trace, round_to=4, hdi_prob=hdi_prob, kind=kind) return az_summary
def data_summary(self, printout=True): """ """ #if self.summary is None: self.summary_data = az.summary(self.trace, var_names=["vals"], filter_vars="like", round_to=2) if printout: print(self.summary_data) return self.summary_data
def summary(self): """ Returns a summary of the sample statistics """ try: summary = az.summary(self.samples, credible_interval=0.9) except ModuleNotFoundError: print("caclulating ess relies on arviz and arviz is not installed") summary = None return summary
def plot_ppc_and_score(trace, data, ax=None, title='PPC', paras=None): # Sample PPC ppc_trace = pm.sample_posterior_predictive(trace=trace, var_names=['y']) # Calculate LOO score loo = az.loo(trace).loo loo_text = "LOO = %.2f"%loo # Aggregate binary responses new_trace = [] for soa in sorted(set((data.SOA_IN_FRAMES))): new_trace.append(ppc_trace['y'][:,(data.SOA_IN_FRAMES==soa) & (data.PROBE_SALIENT==0)].mean(axis=1)) new_trace.append(ppc_trace['y'][:,(data.SOA_IN_FRAMES==soa) & (data.PROBE_SALIENT==1)].mean(axis=1)) ppc_trace = {'y': np.array(new_trace).T} # Prepare axes if none provided if ax is None: f,ax= plt.subplots() # Get SOAs and condition mask from data SOAs = sorted(set(data['SOA_IN_MS'])) cond = data.groupby(['SOA_IN_MS', 'PROBE_SALIENT'])['PROBE_SALIENT'].min().values # Plot az.plot_hdi(y=ppc_trace['y'][:,cond==0],x=SOAs, color='k', ax=ax, hdi_prob=0.95, fill_kwargs={'alpha' : 0.23}) az.plot_hdi(y=ppc_trace['y'][:,cond==1],x=SOAs, color='g', ax=ax, hdi_prob=0.95, fill_kwargs={'alpha' : 0.23}) ax.plot(SOAs, np.mean(ppc_trace['y'][:,cond==0],axis=0), color='k') ax.plot(SOAs, np.mean(ppc_trace['y'][:,cond==1],axis=0), color='g') pf_mean = data.groupby(['SOA_IN_MS', 'PROBE_SALIENT']).mean().PROBE_FIRST_RESPONSE pf_count = data.groupby(['SOA_IN_MS', 'PROBE_SALIENT']).sum().PROBE_FIRST_RESPONSE pf_obs = data.groupby(['SOA_IN_MS', 'PROBE_SALIENT']).count().PROBE_FIRST_RESPONSE pf_ci = abs(np.array(prop_ci(pf_count.values, pf_obs.values)) - pf_mean.values) ax.plot(SOAs, pf_mean.values[::2], 'k.') ax.errorbar(np.array(SOAs)-0.5, pf_mean.values[::2], pf_ci[:,::2], fmt='none', color='k', alpha=0.5) ax.plot(SOAs, pf_mean.values[1::2], 'g.') ax.errorbar(np.array(SOAs)+0.5, pf_mean.values[1::2], pf_ci[:,1::2], fmt='none', color='g', alpha=0.5) ax.axvline(0, linestyle='dashed') ax.axhline(0.5, linestyle='dashed') ax.text(-20,0, loo_text) if paras is not None: for i, varname in enumerate(paras): stats = az.summary(trace, var_names=[varname], hdi_prob=.95) for j, s in enumerate(stats['mean']): text = r'$' + varname + r'$: %.2f [%.2f, %.2f]' text = text%(s, stats['hdi_2.5%'][j], stats['hdi_97.5%'][j]) posx, posy = .1 + .5 - (1 - j) * .5, 0.95 - (.05*i) - ((1-j)*.5) ax.text(posx, posy, text, transform = ax.transAxes, color=['k','g'][j]) ax.set_title(title)
def summarise(self): """ Summarise the results of the model Parameters ---------- None """ self.summary = az.summary(self.trace, var_names=["~chol"], round_to=2) print(self.summary) return self.summary
def compute_and_save_summary(output_dir, var_names, traces, **kwargs): summary = arviz.summary(traces, var_names=var_names) summary_dict = summary.to_dict() summary_dict.update(kwargs) for key, value in traces.items(): if key[-6:] == "_calls": summary_dict["per_chain_" + key] = [int(v[-1]) for v in value] with open(os.path.join(output_dir, "summary.json"), mode="w") as f: json.dump(summary_dict, f, ensure_ascii=False, indent=2) return summary, summary_dict
def plot_traces(self, burnin: int = 200, show_plot: bool = False) -> Optional[plt]: """ Convenience function to plot the traces with overlaid means and values. :param burnin: the number of initial steps to discard. This is so to enable the samples to be representatives of the distributions to be approximated and prevent the random starting point from spoiling the data too much. :param show_plot: whether to display the plot. Default: False. :return: plt: the updated matplotlib.pyplot status """ if self.trace is None: logger.warning( "trace has not yet been created. Call find_map_and_sample before attempting to plot the trace" ) return None else: with warnings.catch_warnings(): warnings.simplefilter( 'ignore', category=FutureWarning) # disables a range of warnings ax = az.plot_trace( self.trace[burnin:], figsize=(12, len(self.trace.varnames) * 1.5), lines={ k: v['mean'] for k, v in az.summary(self.trace[burnin:]).iterrows() }) for i, mn in enumerate( az.summary(self.trace[burnin:])['mean']): ax[i, 0].annotate('{:.2f}'.format(mn), xy=(mn, 0), xycoords='data', xytext=(5, 10), textcoords='offset points', rotation=90, va='bottom', fontsize='large', color='#AA0022') if show_plot: plt.show() return plt
def summary(trace, burn_in=0): """Summary of Random Variables from the `trace` `burn_in` period ignores the initial super-noisy samples. """ num_samples = count_samples(trace) # count the samples in trace if burn_in > 0: # if there is burn-in period, filter out those samples trace = get_last_n_from_trace(trace, num_samples - burn_in) trace, _ = disentangle_trace(trace) # get disentangle_trace for summary return az.summary(trace) # use arviz's summary function
def infer_robust_model(): with pm.Model() as model_0: α = pm.Normal('α', mu=0, sd=10) β = pm.Normal('β', mu=0, sd=10) μ = α + pm.math.dot(x_c, β) θ = pm.Deterministic('θ', pm.math.sigmoid(μ)) bd = pm.Deterministic('bd', -α / β) # decision boundary #yl = pm.Bernoulli('yl', p=θ, observed=y_0) π = pm.Beta('π', 1., 1.) # probability of contamination p = π * 0.5 + (1 - π) * θ # true prob or 0.5 yl = pm.Bernoulli('yl', p=p, observed=y_0) trace = pm.sample(1000, cores=1, chains=2) varnames = ['α', 'β', 'bd', 'π'] az.summary(trace, varnames) return trace
def crude_mixedMLbayse(df_merged, x_feature, y_feature, covars='False', logit=False): #TODO: Replace covars variable with actual selection of indivdual features df_merged = df_merged.replace(-9, np.nan).replace('-9', np.nan).replace( 999, np.nan).replace(888, np.nan) if covars == 'False': data = df_merged[[x_feature, y_feature, 'CohortType']].dropna(how='any', axis='rows') fit_string = y_feature + '~' + x_feature if covars == 'True': data = add_confound(df_merged, x_feature, y_feature) ## create the model string for fit_string = y_feature + '~' cnt = 0 ## filter out target, at birth, and reference dummy variables in model for x in data.columns: #data.drop(['education'], inplace = True, axis = 0) if x != 'birthWt' and x !='Outcome_weeks' and x!= 'Outcome' and x != 'PIN_Patient' and x != 'SGA' and x != 'LGA' \ and x !='birthLen' and x != 'CohortType' and x != 'race' and x!='race_1' and x!= 'smoking' and x != 'smoking_3' \ and x != 'education_5' and x != 'education': if cnt == 0: fit_string += ' ' + x + ' ' else: fit_string += ' + ' + x + ' ' cnt += 1 print('mixedML string:') print(fit_string) fit_string += '+ (1|CohortType)' if logit == False: model = bmb.Model(data) results = model.fit(fit_string) else: model = bmb.Model(data) results = model.fit(fit_string, family='bernoulli', link='logit') ## miced linear model with group variable = CohortType mdf = az.summary(results) return mdf
def az_v_sigma2_plot(stan_fit, var_list=['v', 'sigma2']): """ Function to demonstrate pystan v convergence result through R_hat table, autocorrelation (3 chians), and trace plot """ # print(az.summary(stan_fit, var_names=["v","sigma2",'W'], filter_vars="like")) print(az.summary(stan_fit, var_names=var_list + ['W'])) # az.plot_trace(stan_fit, var_names=['v','sigma2'], filter_vars="like") az.plot_trace(stan_fit, var_names=var_list) az.plot_autocorr(stan_fit, var_names=var_list) az.plot_pair(stan_fit, var_names=var_list, divergences=True)
def summary(self): """Return summary statistics of posterior parameter samples. Default statistics are: ``mean``, ``sd``, ``hdi_3%``, ``hdi_97%``, ``mcse_mean``, ``mcse_sd``, ``ess_bulk``, ``ess_tail``, and ``r_hat``. ``r_hat`` is only computed for traces with 2 or more chains. Returns ------- pandas.DataFrame A dataframe of the summary. """ return az.summary(self.data)
def toy_model(v_samp, z_samp, logp_prior, size=500, samples=50, steps=1000, tune=1000, a_true=1.2, b_true=-0.5, width_true=0.05, extratext='_true'): ''' The pymc3 linear model z(v) = a*v+b with natural width in log space log_width. The prior contribution to likelihood is an argument to the function.''' with pm.Model() as model: a = pm.Normal("a", mu=0, sigma=10, testval=a_true) b = pm.Normal("b", mu=0, sigma=10, testval=b_true) log_width = pm.Normal("log_width", mu=np.log(width_true), sigma=2.0, testval=np.log(width_true)) mu = a * v_samp + b # The line has some width: we're calling it a Gaussian in n logp_hyper = -0.5 * (z_samp - mu)**2 * pm.math.exp( -2 * log_width) - log_width # Here we account for the intermediate prior logp = logp_hyper - logp_prior # Compute the marginalized likelihood max_logp = tt.max(logp, axis=1) # max_logp = np.zeros(len(logM_samp)) marg_logp = max_logp + pm.math.log( pm.math.sum(pm.math.exp(logp - max_logp[:, None]), axis=1)) pm.Potential('marg_logp', marg_logp) trace = pm.sample(draws=steps, tune=tune, target_accept=0.9, init='adapt_full', return_inferencedata=False) # az.plot_trace(trace) print(az.summary(trace, round_to=2)) print(a_true, b_true, np.log(width_true)) corner.corner(pm.trace_to_dataframe(trace), truths=[a_true] + [b_true] + [np.log(width_true)]) # Corner plot! plt.savefig("PriorToy/Corner_N1000_vfcomplex_prior_samp_mixed%s.png" % (extratext), bbox_inches='tight', dpi=150) return
def poisson_prog(): basic_model = pm.Model() with basic_model: x1 = pm.Poisson('x1', mu=lambda_b) x2 = pm.Poisson('x2', mu=lambda_b) u = pm.Poisson('u', mu=lambda_u, observed=x1 + x2) with basic_model: trace = pm.sample(5000) #the two posterior mean are numerically slightly different #we average them pois_mean = np.mean(az.summary(trace)["mean"]) return (pois_mean)