def fun_infer_model_Bernoulli(df, samples=10, tune=100, Krange=np.arange(2, 20), path="./", name=""): ch = 1 N = df.shape[0] Q = df.shape[1] for K in Krange: with pm.Model() as model: learner = pm.Bernoulli('learner', shape=(N, K)) question = pm.Bernoulli('question', a=np.repeat(.1, K), shape=(Q, K)) x = pm.math.dot(learner, question.T) results = pm.Bernoulli('rezults', p=x, shape=(N, Q), observed=df) for RV in model.basic_RVs: print(RV.name, RV.logp(model.test_point)) model_to_graphviz(model) with model: trace = pm.sample(samples, chains=ch, tune=tune, discard_tuned_samples=True) a = pm.math.dot(trace['learner'].mean(0), trace['question'][:, :].mean(0).T) pd.DataFrame(a.eval()).to_csv(path + name + "estim_" + str(K) + ".txt", sep="\t") print("finished: " + str(K)) return model
def test_model_to_graphviz_for_model_with_data_container(self): with pm.Model() as model: x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) obs_sigma = floatX(np.sqrt(1e-2)) pm.Normal("obs", beta * x, obs_sigma, observed=y) pm.sample(1000, init=None, tune=1000, chains=1) for formatting in {"latex", "latex_with_params"}: with pytest.raises(ValueError, match="Unsupported formatting"): pm.model_to_graphviz(model, formatting=formatting) exp_without = [ 'x [label="x\n~\nData" shape=box style="rounded, filled"]', 'beta [label="beta\n~\nNormal"]', 'obs [label="obs\n~\nNormal" style=filled]', ] exp_with = [ 'x [label="x\n~\nData" shape=box style="rounded, filled"]', 'beta [label="beta\n~\nNormal(mu=0.0, sigma=10.0)"]', f'obs [label="obs\n~\nNormal(mu=f(f(beta), x), sigma={obs_sigma})" style=filled]', ] for formatting, expected_substrings in [ ("plain", exp_without), ("plain_with_params", exp_with), ]: g = pm.model_to_graphviz(model, formatting=formatting) # check formatting of RV nodes for expected in expected_substrings: assert expected in g.source
def main(n, observed): ''' parameters -------- n : int number of trials observed: int observed number of success ''' with pm.Model() as exam_model: # Week uniform prior for prior prior = pm.Beta('prior', 0.5, 0.5) # Bernouli trials modeled using binomial distribution obs = pm.Binomial('obs', n=n, p=prior, observed=observed) # plot model design pm.model_to_graphviz(exam_model) # Use metropolis hasting for sampling step = pm.Metropolis() # sample from the prior distribution to get the posterior trace = pm.sample(5000, step) # plot posterior pm.plot_posterior(trace) # calculate gelman rubin stats pm.gelman_rubin(trace)
def bayes_multiple_detector(t, s, n, tracename): with pm.Model() as abrupt_model: sigma = pm.Normal('sigma', mu=30, sigma=5) # sigma = pm.Uniform('sigma', 5, 15) mu = pm.Uniform("mu1", -30, 30) tau = pm.DiscreteUniform("tau" + "1", t.min(), t.max()) for i in np.arange(2, n + 2): _mu = pm.Uniform("mu" + str(i), -200, 0) mu = T.switch(tau >= t, mu, _mu) if (i < (n + 1)): tau = pm.DiscreteUniform("tau" + str(i), tau, t.max()) s_obs = pm.Normal("s_obs", mu=mu, sigma=sigma, observed=s) g = pm.model_to_graphviz(abrupt_model) g.view() with abrupt_model: pm.find_MAP() trace = pm.sample(5000, tune=1000) az.plot_trace(trace) plt.show() az.plot_autocorr(trace) plt.show() az.to_netcdf(trace, getpath('tracepath') + tracename) pm.summary(trace) return trace
def bayes_multiple_detector_each_sigma(t, s, n): scala = 1000 with pm.Model() as abrupt_model: sigma = pm.Normal('sigma', mu=0.02 * scala, sigma=0.015 * scala) # sigma = pm.Uniform('sigma', 5, 15) mu = pm.Uniform("mu1", -1.5 * scala, -1.4 * scala) tau = pm.DiscreteUniform("tau" + "1", t.min(), t.max()) for i in np.arange(2, n + 2): _mu = pm.Uniform("mu" + str(i), -1.6 * scala, -1.4 * scala) mu = T.switch(tau >= t, mu, _mu) if i < (n + 1): ttau = pm.DiscreteUniform("tau" + str(i), tau, t.max()) tau = ttau tau1 = abrupt_model["tau1"] tau2 = abrupt_model["tau2"] dtau = pm.DiscreteUniform('dtau', tau1 + 500, tau2) s_obs = pm.Normal("s_obs", mu=mu, sigma=sigma, observed=s) g = pm.model_to_graphviz(abrupt_model) g.view() with abrupt_model: # pm.find_MAP() trace = pm.sample(20000, tune=5000) az.plot_trace(trace) az.to_netcdf(trace, getpath('tracepath') + 'bd9_4_add_new_rule') plt.show() pm.summary(trace) return trace
def test_model_to_graphviz_for_model_with_data_container(self): with pm.Model() as model: x = pm.Data('x', [1., 2., 3.]) y = pm.Data('y', [1., 2., 3.]) beta = pm.Normal('beta', 0, 10.) pm.Normal('obs', beta * x, np.sqrt(1e-2), observed=y) pm.sample(1000, init=None, tune=1000, chains=1) g = pm.model_to_graphviz(model) text = 'x [label="x ~ Deterministic" shape=box style=filled]' assert text in g.source
def graph(self, formatting="plain", name=None, figsize=None, dpi=300, fmt="png"): """ Produce a graphviz Digraph from a Bambi model. Requires graphviz, which may be installed most easily with ``conda install -c conda-forge python-graphviz`` Alternatively, you may install the ``graphviz`` binaries yourself, and then ``pip install graphviz`` to get the python bindings. See http://graphviz.readthedocs.io/en/stable/manual.html for more information. Parameters ---------- formatting : str One of ``'plain'`` or ``'plain_with_params'``. Defaults to ``'plain'``. name : str Name of the figure to save. Defaults to None, no figure is saved. figsize : tuple Maximum width and height of figure in inches. Defaults to None, the figure size is set automatically. If defined and the drawing is larger than the given size, the drawing is uniformly scaled down so that it fits within the given size. Only works if ``name`` is not None. dpi : int Point per inch of the figure to save. Defaults to 300. Only works if ``name`` is not None. fmt : str Format of the figure to save. Defaults to ``'png'``. Only works if ``name`` is not None. """ if self.backend is None: raise ValueError("The model is empty, please define a Bambi model") graphviz = pm.model_to_graphviz(model=self.backend.model, formatting=formatting) width, height = (None, None) if figsize is None else figsize if name is not None: graphviz_ = graphviz.copy() graphviz_.graph_attr.update(size=f"{width},{height}!") graphviz_.graph_attr.update(dpi=str(dpi)) graphviz_.render(filename=name, format=fmt, cleanup=True) return graphviz
def test_model_to_graphviz_for_model_with_data_container(self): with pm.Model() as model: x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y) pm.sample(1000, init=None, tune=1000, chains=1) g = pm.model_to_graphviz(model) # Data node rendered correctly? text = 'x [label="x ~ Data" shape=box style="rounded, filled"]' assert text in g.source # Didn't break ordinary variables? text = 'beta [label="beta ~ Normal"]' assert text in g.source text = 'obs [label="obs ~ Normal" style=filled]' assert text in g.source
def graph(self, formatting="plain"): """ Produce a graphviz Digraph from a PyMC3 model. Requires graphviz, which may be installed most easily with ``conda install -c conda-forge python-graphviz`` Alternatively, you may install the ``graphviz`` binaries yourself, and then ``pip install graphviz`` to get the python bindings. See http://graphviz.readthedocs.io/en/stable/manual.html for more information. Parameters ---------- formatting : str One of ``'plain'`` or ``'plain_with_params'``. Defaults to ``'plain'``. """ return pm.model_to_graphviz(model=self.backend.model, formatting=formatting)
def test_model_to_graphviz_for_model_with_data_container(self): with pm.Model() as model: x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) obs_sigma = floatX(np.sqrt(1e-2)) pm.Normal("obs", beta * x, obs_sigma, observed=y) pm.sample(1000, init=None, tune=1000, chains=1) g = pm.model_to_graphviz(model) # Data node rendered correctly? text = 'x [label="x\n~\nData" shape=box style="rounded, filled"]' assert text in g.source # Didn't break ordinary variables? text = 'beta [label="beta\n~\nNormal(mu=0.0, sigma=10.0)"]' assert text in g.source text = f'obs [label="obs\n~\nNormal(mu=f(f(beta), x), sigma={obs_sigma})" style=filled]' assert text in g.source
def mcmcNegativeBinomial(data): """Generate a trace for the data""" with pm.Model() as model: # Not familiar with Negative Binomial, so no prior knowledge, let's choose uniform as a prior # To be safe, make sure the possible range is larger than needed. alpha_rv = pm.Uniform('alpha_rv', 0.0, 3.0) mu_rv = pm.Uniform('mu_rv', 0.1, 30.0) score_rv = pm.NegativeBinomial('score_rv', mu=mu_rv, alpha=alpha_rv, observed=data) step = pm.NUTS() trace = pm.sample(step=step, draws=10000, chains=4, cores=4, init='adapt_diag') graph = pm.model_to_graphviz(model) graph.render(filename='model', format='png') return trace
def get_GraphViz_object(self, file_prefix: str, save_dot: bool = True, save_png: bool = True, extension: str = "png"): """ Returns the GraphViz object corresponding to the underlying hierarchical model. :param file_prefix: a string with desired prefix to add to saved files. It can include a folder name too. :param save_dot: a boolean indicating if text file need to be stored too :param save_png: a boolean indicating if an pictorial file need to be stored too :param extension: a string indicating the extension of pictorial file, e.g., "png" """ graph = pm.model_to_graphviz(self.pymc_model) graph.format = extension if save_dot: txtFileName = f"{file_prefix}_hierarchical_graph.txt" graph.save(txtFileName) logger.info(f"Graph's source saved to {txtFileName}") if save_png: pngFileName = f"{file_prefix}_hierarchical_graph" graph.render(pngFileName, view=False, cleanup=True) logger.info(f"Graph picture saved to {pngFileName}") return graph
# Hyper-Priors hyper_mu_sd = pm.Uniform('hyper_mu_sd', lower=0, upper=10) hyper_mu_mu = pm.Uniform('hyper_mu_mu', lower=0, upper=10) # Priors mu = pm.Gamma('mu', mu=hyper_mu_mu, sigma=hyper_mu_sd, shape=n_hrs) # Data Likelihood y_like = pm.Poisson('y_like', mu=mu[hrs_idx], observed=y_obs) pm.model_to_graphviz(arrivalModel) #%% Hierarchical Energy Model with pm.Model() as EVpooling: # Hyper-Priors hyper_mu_mu = pm.Uniform('hyper_mu_mu', lower=0, upper=upprbnd) hyper_mu_sd = pm.Uniform('hyper_mu_sd', lower=0, upper=upprbnd) hyper_sd_mu = pm.Uniform('hyper_sd_mu', lower=0, upper=upprbnd) hyper_sd_sd = pm.Uniform('hyper_sd_sd', lower=0, upper=upprbnd) # Priors # mu = pm.Normal('mu', mu=hyper_mu_mu, sigma=hyper_mu_sd, # shape=n_hrs)
def show_model(model, name='tmp'): pm.model_to_graphviz(model).render(name, view=True, cleanup=True)
def _fit_model(): # load data df_a = pd.read_csv(os.path.join(path, 'paper_results', 'analogies.tsv'), sep='\t')[['lang', 'vecs', 'source', 'adjusted score']] df_s = pd.read_csv(os.path.join(path, 'paper_results', 'similarities.tsv'), sep='\t')[['lang', 'vecs', 'source', 'adjusted rank r']] df_n = pd.read_csv(os.path.join(path, 'paper_results', 'norms.tsv'), sep='\t')[['lang', 'vecs', 'norm', 'adjusted r']] df_b = pd.read_csv(os.path.join(path, 'paper_results', 'binder.tsv'), sep='\t')[['lang', 'vecs', 'norm', 'adjusted r']] # keep track of different evaluation tasks df_a['kind'] = 'analogies' df_s['kind'] = 'similarities' df_n['kind'] = 'norms' df_b['kind'] = 'norms' # rename different metrics to score, and various dataset origins to task df_a = df_a.rename(columns={'source': 'task', 'adjusted score': 'score'}) df_s = df_s.rename(columns={'source': 'task', 'adjusted rank r': 'score'}) df_n = df_n.rename(columns={'norm': 'task', 'adjusted r': 'score'}) df_b = df_b.rename(columns={'norm': 'task', 'adjusted r': 'score'}) # stack datasets df = pd.concat([df_a, df_s, df_n, df_b]) # merge in corpus word counts df_corpus = pd.read_csv(os.path.join(path, 'paper_results', 'table_data.tsv'), sep='\t') df = df.merge(df_corpus[['lang', 'vecs', 'words']], how='inner', on=['lang', 'vecs']) df.to_csv('model_data.tsv', sep='\t', index=False) # store merged data for record keeping df['log10_wordcount'] = np.log10(df['words']) # log-transform word counts df['log10_wordcount_z'] = standardize(df['log10_wordcount']) # standardize word counts # create sum-coded contrasts df['wiki'] = df['vecs'].apply(lambda x: sum_contrast(x, 'wiki', 'wiki+subs')) df['subs'] = df['vecs'].apply(lambda x: sum_contrast(x, 'subs', 'wiki+subs')) df['analogies'] = df['kind'].apply(lambda x: sum_contrast(x, 'analogies', 'similarities')) df['norms'] = df['kind'].apply(lambda x: sum_contrast(x, 'norms', 'similarities')) # define PyMC3 model for statistical inference with pm.Model() as beta_model: # define centered Normal priors for all the betas, sd = 1 (mild shrinkage prior) intercept = pm.Normal('μ', mu=0, sd=1) b_wordcount = pm.Normal('β log corpus word count', mu=0, sd=1) b_wiki = pm.Normal('β wiki vs. mean', mu=0, sd=1) b_subs = pm.Normal('β subs vs. mean', mu=0, sd=1) b_norms = pm.Normal('β norms vs. mean', mu=0, sd=1) b_analogies = pm.Normal('β analogies vs. mean', mu=0, sd=1) b_wiki_norms = pm.Normal('β wiki vs. mean:norms vs. mean', mu=0, sd=1) b_wiki_analogies = pm.Normal('β wiki vs. mean:analogies vs. mean', mu=0, sd=1) b_subs_norms = pm.Normal('β subs vs. mean:norms vs. mean', mu=0, sd=1) b_subs_analogies = pm.Normal('β subs vs. mean:analogies vs. mean', mu=0, sd=1) b_wikisubs = pm.Deterministic('β wiki+subs vs. mean', -1 * (b_subs + b_wiki)) b_similarities = pm.Deterministic('β similarities vs. mean', -1 * (b_analogies + b_norms)) b_wikisubs_norms = pm.Deterministic('β wiki+subs vs. mean:norms vs. mean', -1 * (b_subs_norms + b_wiki_norms)) b_wikisubs_analogies = pm.Deterministic('β wiki+subs vs. mean:analogies vs. mean', -1 * (b_subs_analogies + b_wiki_analogies)) b_subs_similarities = pm.Deterministic('β subs vs. mean:similarities vs. mean', -1 * (b_subs_analogies + b_subs_norms)) b_wiki_similarities = pm.Deterministic('β wiki vs. mean:similarities vs. mean', -1 * (b_wiki_analogies + b_wiki_norms)) # given the above, there are two ways to compute the interaction wiki+subs vs.mean:similarities vs. mean # both methods are given below, but we only need to use one # they give the exact same answer though, you can uncomment the second line to verify b_wikisubs_similarities = pm.Deterministic('β wiki+subs vs. mean:similarities vs. mean', -1 * (b_wiki_similarities + b_subs_similarities)) # b_wikisubs_similarities2 = pm.Deterministic('β wiki+subs vs. mean:similarities vs. mean (2)', -1 * (b_wikisubs_analogies + b_wikisubs_norms)) # non-centered parametrization for task-level random intercepts task_codes, task_uniques = df['task'].factorize() # get number of unique groups and code them mu_tilde_task = pm.Normal('μ\u0303 task', mu=0, sd=1, shape=len(task_uniques)) # prior for task group offsets sigma_task = pm.HalfNormal('σ task', sd=1) # prior for task group sigma mu_task = pm.Deterministic('μ task', sigma_task * mu_tilde_task) # task group means (random intercepts) # non-centered parametrization for language-level random intercepts lang_codes, lang_uniques = df['lang'].factorize() # get number of unique groups and code them mu_tilde_lang = pm.Normal('μ\u0303 lang', mu=0, sd=1, shape=len(lang_uniques)) # prior for lang group offsets sigma_lang = pm.HalfNormal('σ lang', sd=1) # prior for lang group sigma mu_lang = pm.Deterministic('μ lang', sigma_lang * mu_tilde_lang) # lang group means (random intercepts) # compute predictions for y, using logit link function y_hat = pm.Deterministic('ŷ', pm.math.invlogit( intercept + b_wordcount * df['log10_wordcount_z'] + b_wiki * df['wiki'] + b_subs * df['subs'] + b_norms * df['norms'] + b_analogies * df['analogies'] + b_wiki_norms * df['wiki'] * df['norms'] + b_wiki_analogies * df['wiki'] * df['analogies'] + b_subs_norms * df['subs'] * df['norms'] + b_subs_analogies * df['subs'] * df['analogies'] + mu_lang[lang_codes] + mu_task[task_codes] )) # define likelihood invphi = pm.HalfNormal('1 / φ', sd=1) # prior for phi, for Beta(mu, phi) parametrization of the likelihood distribution phi = pm.Deterministic('φ', 1 / invphi) y = pm.Beta('y', alpha=y_hat * phi, beta=(1 - y_hat) * phi, observed=df['score']) # sample with 3 chains, 2000 warmup + 4000 posterior samples per chain trace = pm.sample(2500, tune=2500, chains=4, target_accept=.9) # store trace summary as tsv and LaTeX table df_summary = pm.summary(trace, credible_interval=.9) df_summary.to_csv('trace_summary.tsv', sep='\t') with open('trace_summary_latex.txt', 'w') as latextable: latextable.write(df_summary.round(2).to_latex()) # draw and store model graph graph = pm.model_to_graphviz(beta_model) graph.graph_attr['rankdir'] = 'LR' # change graph orientation to left-right (from top-down) graph.render(filename='model', format='pdf', cleanup=True) # draw and store forest plot varnames = [ 'μ', 'β log corpus word count', 'β subs vs. mean', 'β wiki vs. mean', 'β wiki+subs vs. mean', 'β analogies vs. mean', 'β norms vs. mean', 'β similarities vs. mean', 'β subs vs. mean:analogies vs. mean', 'β subs vs. mean:norms vs. mean', 'β subs vs. mean:similarities vs. mean', 'β wiki vs. mean:analogies vs. mean', 'β wiki vs. mean:norms vs. mean', 'β wiki vs. mean:similarities vs. mean', 'β wiki+subs vs. mean:analogies vs. mean', 'β wiki+subs vs. mean:norms vs. mean', 'β wiki+subs vs. mean:similarities vs. mean', ] axes = pm.forestplot(trace, var_names=varnames, credible_interval=.9, combined=True, figsize=(4, 6)) axes[0].set(title='90% credible intervals', xlabel='coefficient (in log-odds)') plt.savefig('forestplot.pdf') plt.savefig('forestplot.png', dpi=600) plt.clf() # draw and store trace plot pm.traceplot(trace) plt.savefig('traceplot.png', dpi=300) # the traceplot is huge, so we lower the resolution and don't store it as pdf plt.clf() return df_summary
def plot_plate(compiled_model, model_type): g = pm.model_to_graphviz(compiled_model) g.render(f"../plots_python/{model_type}_plate", format="png")
# likelihood y_pred = pm.Normal( "y_pred", mu = mu, sigma = sigma, observed = y_train) # return the model return m_pooled # now run the function to compile the model m_pooled = pooled() ### python: plate notation ### pm.model_to_graphviz(m_pooled) ### python: prior predictive checks ### # sample prior predictive with m_pooled: prior = pm.sample_prior_predictive(700, random_seed = RANDOM_SEED) idata_pooled = az.from_pymc3(prior=prior) # set up plot fig, ax = plt.subplots() # if you just want the figure then this is enough az.plot_ppc(idata_pooled, # the idata group = "prior", # plot the prior num_pp_samples = 100, # how many draws ax = ax) # add to matplotlib ax.
#https://docs.pymc.io/notebooks/getting_started.html import pymc3 as mc import numpy as np basic_model = mc.Model() data = np.array([0, 1, 0, 1]) import matplotlib.pyplot as plt ## Set up the model with basic_model: data = np.array([1, 0, 0, 0, 1]) theta = mc.Beta('theta', alpha=1, beta=1) y = mc.Bernoulli('y', p=theta, observed=data) mc.model_to_graphviz(basic_model) ## Estimate distributions with basic_model: start = mc.find_MAP() # Find starting value by optimization step = mc.NUTS(scaling=start) # Instantiate MCMC sampling algorithm trace = mc.sample(2000, step, start=start, njobs=4, progressbar=True) ## Plot mc.traceplot(trace) plt.show()
ax=ax, vmin=-1, vmax=1, cmap=balance_r, annot=True) ax.set_facecolor('black') with pm.Model() as m11: σ = pm.Exponential('σ', 1) β_PC = pm.Normal('β_PC', 0, 1) β_GC = pm.Normal('β_GC', 0, 1) α = pm.Normal('α', 0, 1) μ = α + β_PC * d_.P.values + β_GC * d_.G.values C = pm.Normal('C', mu=μ, sd=σ, observed=d_.C.values) pm.model_to_graphviz(m11) with m11: trc11 = pm.sample(1000, tune=1500) pm.summary(trc11, alpha=0.11) """ The unmeasured U makes P a collider, and conditioning on P produces collider bias. Assuming we can measure U, below is the multivariate regression that also conditions on U: """ with pm.Model() as m12: σ = pm.Exponential('σ', 1) β_PC = pm.Normal('β_PC', 0, 1) β_GC = pm.Normal('β_GC', 0, 1) β_U = pm.Normal('β_U', 0, 1) α = pm.Normal('α', 0, 1) μ = α + β_PC * d_.P.values + β_GC * d_.G.values + β_U * d_.U.values
def renderGraphicalModel(model): pm.model_to_graphviz(model).render(filename='model', view=True, cleanup=True)
hrs = le.classes_ n_hrs = len(hrs) with pm.Model() as EVmodel: # Hyper-Priors hyper_mu_sd = pm.Uniform('hyper_mu_sd', lower=0, upper=10) hyper_mu_mu = pm.Uniform('hyper_mu_mu', lower=0, upper=10) # Priors mu = pm.Gamma('mu', mu=hyper_mu_mu, sigma=hyper_mu_sd, shape=n_hrs) # Data Likelihood y_like = pm.Poisson('y_like', mu=mu[hrs_idx], observed=obsVals) pm.model_to_graphviz(EVmodel) #% Hierarchical Model Inference # Setup vars smpls = 2500 tunes = 500 ch = 4 # Print Header print('Work Charging') print('Params: samples = ', smpls, ' | tune = ', tunes) with EVmodel: trace = pm.sample(smpls, tune=tunes, chains=ch, cores=1)
args = parser.parse_args() # generate a sample using the helper function galaxies = generate_sample.generate_sample(n_gals=args.ngals, seed=0) if args.output == '': args.output = 'n{}d{}t{}.pickle'.format( args.ngals or len(galaxies), args.ndraws, args.ntune, ) # initialize the model using the custom BHSM class bhsm = UniformBHSM(galaxies) pm.model_to_graphviz(bhsm.model).view('plots/model_graphviz.pdf') sys.exit(0) trace = bhsm.do_inference( draws=args.ndraws, tune=args.ntune, ) # save EVERYTHING with open(args.output, "wb") as buff: pickle.dump( { 'model': bhsm, 'trace': trace, 'n_samples': args.ndraws, 'n_burn': args.ntune
def show_model(self, save=False, view=True, cleanup=True): model_graph = pm.model_to_graphviz(self.model) if save: model_graph.render(save, view=view, cleanup=cleanup) if view: return model_graph
basic_model = pm.Model() with basic_model: # Priors for unknown model parameters alpha = pm.Normal("alpha", mu=0, sigma=10) beta0 = pm.Normal("beta0", mu=1, sigma=5) beta1 = pm.Normal("beta1", mu=2, sigma=5) sigma = pm.HalfNormal("sigma", sigma=1) # Expected value of outcome mu = alpha + beta0 * X1 + beta1 * X2 # Likelihood (sampling distribution) of observations Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=Y) pm.model_to_graphviz(basic_model) # %% FIND MOST LIKELY PARAMETERS map_estimate = pm.find_MAP(model=basic_model) pprint(map_estimate) # %% FIND DISTRIBUTIONS OF PARAMETERS with basic_model: # draw 500 posterior samples trace = pm.sample(5000) # %% pm.traceplot(trace) # %% pm.summary(trace).round(2)
binary_output.reset_index(drop = True, inplace = True) # concatenate #X_tmp = X_tmp_1.loc[:, ['mod00_booking_yn', 'mod99_cap_member_id']] output = pd.concat([transposed_output_sorted_tmp, binary_output], axis = 1) output.set_index('index', inplace = True) y_predicted = pd.concat([output, y_test_oob, X_test_oob.loc[:, 'mod99_cap_member_id']], axis = 1) # compute and print classification metrics MCC_metric = matthews_corrcoef(y_predicted.loc[:, 'truth'], y_predicted.loc[:, 'binary']) precision_metric = precision_score(y_predicted.loc[:, 'truth'], y_predicted.loc[:, 'binary']) recall_metric = recall_score(y_predicted.loc[:, 'truth'], y_predicted.loc[:, 'binary']) print('MCC = %.3f\n precision = %.3f\n recall = %.3f' % \ (MCC_metric, precision_metric, recall_metric )) """ ############################################################################### ## 10. MCMC TRACE DIAGNOSTICS [to be done only once for calibrating the bayesian model] # see graph for model import graphviz pm.model_to_graphviz(varying_intercept_slope_noncentered) # too RAM damanding data = az.convert_to_dataset(varying_intercept_slope_noncentered_trace) ## show traces pm.traceplot(varying_intercept_slope_noncentered_trace) #az.plot_trace(glm_model_trace, compact=True)
thetas = pm.Beta('thetas', alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=N) y = pm.Binomial('y', n=at_bats, p=thetas, observed=hits) #%% with baseball_model: theta_new = pm.Beta('theta_new', alpha=phi * kappa, beta=(1.0 - phi) * kappa) y_new = pm.Binomial('y_new', n=4, p=theta_new, observed=0) #%% with baseball_model: trace = pm.sample(2000, cores=1, tune=1000, chains=2, target_accept=0.95) pm.traceplot(trace, var_names=['phi', 'kappa']) #%% pm.model_to_graphviz(baseball_model) #out_smry = pd.DataFrame(pm.summary(trace)) #fig = plt.gcf() #fig.savefig("out_hr" + str(int(h)) + "_tracePlt" + ".png")
mu=mode_one_mean[bird_idx], sd=mode_one_sd[bird_idx]) mode_two[bird_day_idx] = pm.Normal(bird_day_id + '_2', mu=mode_two_mean[bird_idx], sd=mode_two_sd[bird_idx]) mode_three[bird_day_idx] = pm.Normal(bird_day_id + '_3', mu=mode_three_mean[bird_idx], sd=mode_three_sd[bird_idx]) wake[bird_day_idx] = pm.Normal(bird_day_id + '_W', mu=wake_mean[bird_idx], sd=wake_sd[bird_idx]) sleep[bird_day_idx] = pm.Normal(bird_day_id + '_S', mu=sleep_mean[bird_idx], sd=sleep_sd[bird_idx]) if (day_idx + 1) != len(bird): for obs_idx, obs in enumerate(day): bird_day_obs_idx = bird_day_idx + obs_idx bird_day_obs_id = bird_day_id + '_' + str(obs_idx) #y[bird_day_obs_idx] = pm.Normal(bird_day_obs_id, mu=mu, sd=sd, observed=obs[1]) else: for obs_idx in enumerate(day): bird_day_obs_idx = bird_day_idx + obs_idx bird_day_obs_id = bird_day_id + '_' + str(obs_idx) #y[bird_day_obs_idx] = pm.Normal(bird_day_obs_id, mu=mu, sd=sd, observed=obs[1]) pm.model_to_graphviz(model)
with pm.Model() as model: step_size = pm.Exponential("step_size", 10) volatility = pm.GaussianRandomWalk("volatility", sigma=step_size, shape=len(data)) nu = pm.Exponential("nu", 0.1) returns = pm.StudentT("returns", nu=nu, lam=np.exp(-2 * volatility), observed=data["change"]) return model stochastic_vol_model = make_stochastic_volatility_model(returns) # %% pm.model_to_graphviz(stochastic_vol_model) # %% with stochastic_vol_model: prior = pm.sample_prior_predictive(500) # %% fig, ax = plt.subplots(figsize=(14, 4)) returns["change"].plot(ax=ax, lw=1, color="black") ax.plot(prior["returns"][4:6].T, "g", alpha=0.5, lw=1, zorder=-10) max_observed, max_simulated = np.max(np.abs(returns["change"])), np.max( np.abs(prior["returns"])) ax.set_title( f"Maximum observed: {max_observed:.2g}\nMaximum simulated: {max_simulated:.2g}(!)" ) # %% with stochastic_vol_model:
plt.ylabel("daily returns in %") # %% with pm.Model() as sp500_model: nu = pm.Exponential("nu", 1 / 10.0, testval=5.0) sigma = pm.Exponential("sigma", 1 / 0.02, testval=0.1) s = pm.GaussianRandomWalk("s", sigma=sigma, shape=len(returns)) volatility_process = pm.Deterministic( "volatility_process", pm.math.exp(-2 * s) ** 0.5 ) r = pm.StudentT("r", nu=nu, sigma=volatility_process, observed=returns["change"]) # %% pm.model_to_graphviz(sp500_model) # %% with sp500_model: trace = pm.sample(2000) # %% pm.traceplot(trace) # %%l fig, ax = plt.subplots(figsize=(15, 8)) returns.plot(ax=ax) ax.plot(returns.index, 1 / np.exp(trace["s", ::5].T), "C3", alpha=0.03) ax.set(title="volatility_process", xlabel="time", ylabel="volatility") ax.legend(["S&P500", "stochastic volatility process"])
plt.sca(ax[i][0]) for j in range(trace.nchains): chain = trace.get_values(p, chains=[j]) sns.kdeplot(chain) ax[i][1].plot(chain, alpha=0.25) if names is not None: plt.title(names[i]) plt.tight_layout() print('\tPlotting model') try: with bhsm.model as model: pm.model_to_graphviz(bhsm.model).render( os.path.join(args.output, 'model'), view=False, format='pdf', cleanup=True, ) except ImportError: pass print('\tPlotting traceplot') # # this use too much RAM, so we define our own above # pm.traceplot( # trace, # var_names=var_names # ) traceplot(trace, var_names, names) plt.savefig(os.path.join(args.output, 'trace.png'), bbox_inches='tight') plt.close()