def fun_infer_model_Bernoulli(df,
                              samples=10,
                              tune=100,
                              Krange=np.arange(2, 20),
                              path="./",
                              name=""):
    ch = 1

    N = df.shape[0]
    Q = df.shape[1]
    for K in Krange:
        with pm.Model() as model:
            learner = pm.Bernoulli('learner', shape=(N, K))
            question = pm.Bernoulli('question',
                                    a=np.repeat(.1, K),
                                    shape=(Q, K))
            x = pm.math.dot(learner, question.T)
            results = pm.Bernoulli('rezults', p=x, shape=(N, Q), observed=df)

        for RV in model.basic_RVs:
            print(RV.name, RV.logp(model.test_point))
        model_to_graphviz(model)

        with model:
            trace = pm.sample(samples,
                              chains=ch,
                              tune=tune,
                              discard_tuned_samples=True)

        a = pm.math.dot(trace['learner'].mean(0),
                        trace['question'][:, :].mean(0).T)
        pd.DataFrame(a.eval()).to_csv(path + name + "estim_" + str(K) + ".txt",
                                      sep="\t")
        print("finished: " + str(K))
    return model
Exemple #2
0
    def test_model_to_graphviz_for_model_with_data_container(self):
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 10.0)
            obs_sigma = floatX(np.sqrt(1e-2))
            pm.Normal("obs", beta * x, obs_sigma, observed=y)
            pm.sample(1000, init=None, tune=1000, chains=1)

        for formatting in {"latex", "latex_with_params"}:
            with pytest.raises(ValueError, match="Unsupported formatting"):
                pm.model_to_graphviz(model, formatting=formatting)

        exp_without = [
            'x [label="x\n~\nData" shape=box style="rounded, filled"]',
            'beta [label="beta\n~\nNormal"]',
            'obs [label="obs\n~\nNormal" style=filled]',
        ]
        exp_with = [
            'x [label="x\n~\nData" shape=box style="rounded, filled"]',
            'beta [label="beta\n~\nNormal(mu=0.0, sigma=10.0)"]',
            f'obs [label="obs\n~\nNormal(mu=f(f(beta), x), sigma={obs_sigma})" style=filled]',
        ]
        for formatting, expected_substrings in [
            ("plain", exp_without),
            ("plain_with_params", exp_with),
        ]:
            g = pm.model_to_graphviz(model, formatting=formatting)
            # check formatting of RV nodes
            for expected in expected_substrings:
                assert expected in g.source
Exemple #3
0
def main(n, observed):
    '''
    parameters
    --------
    n : int
        number of trials
    observed: int
         observed number of success      
    '''
    with pm.Model() as exam_model:
        # Week uniform prior for prior
        prior = pm.Beta('prior', 0.5, 0.5)

        # Bernouli trials modeled using binomial distribution
        obs = pm.Binomial('obs', n=n, p=prior, observed=observed)

        # plot model design
        pm.model_to_graphviz(exam_model)

        # Use metropolis hasting for sampling
        step = pm.Metropolis()

        # sample from the prior distribution to get the posterior
        trace = pm.sample(5000, step)

        # plot posterior
        pm.plot_posterior(trace)

        # calculate gelman rubin stats
        pm.gelman_rubin(trace)
Exemple #4
0
def bayes_multiple_detector(t, s, n, tracename):
    with pm.Model() as abrupt_model:
        sigma = pm.Normal('sigma', mu=30, sigma=5)
        # sigma = pm.Uniform('sigma', 5, 15)
        mu = pm.Uniform("mu1", -30, 30)
        tau = pm.DiscreteUniform("tau" + "1", t.min(), t.max())

        for i in np.arange(2, n + 2):
            _mu = pm.Uniform("mu" + str(i), -200, 0)
            mu = T.switch(tau >= t, mu, _mu)
            if (i < (n + 1)):
                tau = pm.DiscreteUniform("tau" + str(i), tau, t.max())
        s_obs = pm.Normal("s_obs", mu=mu, sigma=sigma, observed=s)
    g = pm.model_to_graphviz(abrupt_model)
    g.view()
    with abrupt_model:
        pm.find_MAP()
        trace = pm.sample(5000, tune=1000)
        az.plot_trace(trace)
        plt.show()
        az.plot_autocorr(trace)
        plt.show()
        az.to_netcdf(trace, getpath('tracepath') + tracename)
        pm.summary(trace)
    return trace
def bayes_multiple_detector_each_sigma(t, s, n):
    scala = 1000
    with pm.Model() as abrupt_model:
        sigma = pm.Normal('sigma', mu=0.02 * scala, sigma=0.015 * scala)
        # sigma = pm.Uniform('sigma', 5, 15)
        mu = pm.Uniform("mu1", -1.5 * scala, -1.4 * scala)
        tau = pm.DiscreteUniform("tau" + "1", t.min(), t.max())

        for i in np.arange(2, n + 2):
            _mu = pm.Uniform("mu" + str(i), -1.6 * scala, -1.4 * scala)
            mu = T.switch(tau >= t, mu, _mu)
            if i < (n + 1):
                ttau = pm.DiscreteUniform("tau" + str(i), tau, t.max())
                tau = ttau

        tau1 = abrupt_model["tau1"]
        tau2 = abrupt_model["tau2"]
        dtau = pm.DiscreteUniform('dtau', tau1 + 500, tau2)

        s_obs = pm.Normal("s_obs", mu=mu, sigma=sigma, observed=s)
    g = pm.model_to_graphviz(abrupt_model)
    g.view()
    with abrupt_model:
        # pm.find_MAP()
        trace = pm.sample(20000, tune=5000)
        az.plot_trace(trace)
        az.to_netcdf(trace, getpath('tracepath') + 'bd9_4_add_new_rule')
        plt.show()
        pm.summary(trace)
    return trace
    def test_model_to_graphviz_for_model_with_data_container(self):
        with pm.Model() as model:
            x = pm.Data('x', [1., 2., 3.])
            y = pm.Data('y', [1., 2., 3.])
            beta = pm.Normal('beta', 0, 10.)
            pm.Normal('obs', beta * x, np.sqrt(1e-2), observed=y)
            pm.sample(1000, init=None, tune=1000, chains=1)

        g = pm.model_to_graphviz(model)
        text = 'x [label="x ~ Deterministic" shape=box style=filled]'
        assert text in g.source
Exemple #7
0
    def graph(self,
              formatting="plain",
              name=None,
              figsize=None,
              dpi=300,
              fmt="png"):
        """
        Produce a graphviz Digraph from a Bambi model.

        Requires graphviz, which may be installed most easily with
            ``conda install -c conda-forge python-graphviz``

        Alternatively, you may install the ``graphviz`` binaries yourself, and then
        ``pip install graphviz`` to get the python bindings.
        See http://graphviz.readthedocs.io/en/stable/manual.html for more information.

        Parameters
        ----------
        formatting : str
            One of ``'plain'`` or ``'plain_with_params'``. Defaults to ``'plain'``.
        name : str
            Name of the figure to save. Defaults to None, no figure is saved.
        figsize : tuple
            Maximum width and height of figure in inches. Defaults to None, the figure size is set
            automatically. If defined and the drawing is larger than the given size, the drawing is
            uniformly scaled down so that it fits within the given size.  Only works if ``name``
            is not None.
        dpi : int
            Point per inch of the figure to save.
            Defaults to 300. Only works if ``name`` is not None.
        fmt : str
            Format of the figure to save.
            Defaults to ``'png'``. Only works if ``name`` is not None.
        """
        if self.backend is None:
            raise ValueError("The model is empty, please define a Bambi model")

        graphviz = pm.model_to_graphviz(model=self.backend.model,
                                        formatting=formatting)

        width, height = (None, None) if figsize is None else figsize

        if name is not None:
            graphviz_ = graphviz.copy()
            graphviz_.graph_attr.update(size=f"{width},{height}!")
            graphviz_.graph_attr.update(dpi=str(dpi))
            graphviz_.render(filename=name, format=fmt, cleanup=True)

        return graphviz
Exemple #8
0
    def test_model_to_graphviz_for_model_with_data_container(self):
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 10.0)
            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
            pm.sample(1000, init=None, tune=1000, chains=1)

        g = pm.model_to_graphviz(model)

        # Data node rendered correctly?
        text = 'x [label="x ~ Data" shape=box style="rounded, filled"]'
        assert text in g.source
        # Didn't break ordinary variables?
        text = 'beta [label="beta ~ Normal"]'
        assert text in g.source
        text = 'obs [label="obs ~ Normal" style=filled]'
        assert text in g.source
Exemple #9
0
    def graph(self, formatting="plain"):
        """
        Produce a graphviz Digraph from a PyMC3 model.

        Requires graphviz, which may be installed most easily with
            ``conda install -c conda-forge python-graphviz``

        Alternatively, you may install the ``graphviz`` binaries yourself, and then
        ``pip install graphviz`` to get the python bindings.
        See http://graphviz.readthedocs.io/en/stable/manual.html for more information.

        Parameters
        ----------
        formatting : str
            One of ``'plain'`` or ``'plain_with_params'``. Defaults to ``'plain'``.
        """
        return pm.model_to_graphviz(model=self.backend.model,
                                    formatting=formatting)
Exemple #10
0
    def test_model_to_graphviz_for_model_with_data_container(self):
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 10.0)
            obs_sigma = floatX(np.sqrt(1e-2))
            pm.Normal("obs", beta * x, obs_sigma, observed=y)
            pm.sample(1000, init=None, tune=1000, chains=1)

        g = pm.model_to_graphviz(model)

        # Data node rendered correctly?
        text = 'x [label="x\n~\nData" shape=box style="rounded, filled"]'
        assert text in g.source
        # Didn't break ordinary variables?
        text = 'beta [label="beta\n~\nNormal(mu=0.0, sigma=10.0)"]'
        assert text in g.source
        text = f'obs [label="obs\n~\nNormal(mu=f(f(beta), x), sigma={obs_sigma})" style=filled]'
        assert text in g.source
Exemple #11
0
def mcmcNegativeBinomial(data):
    """Generate a trace for the data"""
    with pm.Model() as model:
        # Not familiar with Negative Binomial, so no prior knowledge, let's choose uniform as a prior
        # To be safe, make sure the possible range is larger than needed.
        alpha_rv = pm.Uniform('alpha_rv', 0.0, 3.0)
        mu_rv = pm.Uniform('mu_rv', 0.1, 30.0)
        score_rv = pm.NegativeBinomial('score_rv',
                                       mu=mu_rv,
                                       alpha=alpha_rv,
                                       observed=data)
        step = pm.NUTS()
        trace = pm.sample(step=step,
                          draws=10000,
                          chains=4,
                          cores=4,
                          init='adapt_diag')
        graph = pm.model_to_graphviz(model)
    graph.render(filename='model', format='png')
    return trace
Exemple #12
0
 def get_GraphViz_object(self,
                         file_prefix: str,
                         save_dot: bool = True,
                         save_png: bool = True,
                         extension: str = "png"):
     """
     Returns the GraphViz object corresponding to the underlying hierarchical model.
     :param file_prefix: a string with desired prefix to add to saved files. It can include a folder name too.
     :param save_dot: a boolean indicating if text file need to be stored too
     :param save_png: a boolean indicating if an pictorial file need to be stored too
     :param extension: a string indicating the extension of pictorial file, e.g., "png"
     """
     graph = pm.model_to_graphviz(self.pymc_model)
     graph.format = extension
     if save_dot:
         txtFileName = f"{file_prefix}_hierarchical_graph.txt"
         graph.save(txtFileName)
         logger.info(f"Graph's source saved to {txtFileName}")
     if save_png:
         pngFileName = f"{file_prefix}_hierarchical_graph"
         graph.render(pngFileName, view=False, cleanup=True)
         logger.info(f"Graph picture saved to {pngFileName}")
     return graph
Exemple #13
0
    
    # Hyper-Priors
    hyper_mu_sd = pm.Uniform('hyper_mu_sd', lower=0, upper=10)
    hyper_mu_mu = pm.Uniform('hyper_mu_mu', lower=0, upper=10) 
    
    # Priors   
    mu = pm.Gamma('mu', mu=hyper_mu_mu, 
                        sigma=hyper_mu_sd,
                        shape=n_hrs)    
    
    # Data Likelihood
    y_like = pm.Poisson('y_like', 
                       mu=mu[hrs_idx], 
                       observed=y_obs)    

pm.model_to_graphviz(arrivalModel)

#%% Hierarchical Energy Model
    
with pm.Model() as EVpooling:
    
    # Hyper-Priors    
    hyper_mu_mu = pm.Uniform('hyper_mu_mu', lower=0, upper=upprbnd)
    hyper_mu_sd = pm.Uniform('hyper_mu_sd', lower=0, upper=upprbnd)
    
    hyper_sd_mu = pm.Uniform('hyper_sd_mu', lower=0, upper=upprbnd)
    hyper_sd_sd = pm.Uniform('hyper_sd_sd', lower=0, upper=upprbnd)
    
    # Priors
#    mu = pm.Normal('mu', mu=hyper_mu_mu, sigma=hyper_mu_sd,
#                        shape=n_hrs)    
Exemple #14
0
def show_model(model, name='tmp'):
    pm.model_to_graphviz(model).render(name, view=True, cleanup=True)
Exemple #15
0
def _fit_model():
    # load data
    df_a = pd.read_csv(os.path.join(path, 'paper_results', 'analogies.tsv'), sep='\t')[['lang', 'vecs', 'source', 'adjusted score']]
    df_s = pd.read_csv(os.path.join(path, 'paper_results', 'similarities.tsv'), sep='\t')[['lang', 'vecs', 'source', 'adjusted rank r']]
    df_n = pd.read_csv(os.path.join(path, 'paper_results', 'norms.tsv'), sep='\t')[['lang', 'vecs', 'norm', 'adjusted r']]
    df_b = pd.read_csv(os.path.join(path, 'paper_results', 'binder.tsv'), sep='\t')[['lang', 'vecs', 'norm', 'adjusted r']]

    # keep track of different evaluation tasks
    df_a['kind'] = 'analogies'
    df_s['kind'] = 'similarities'
    df_n['kind'] = 'norms'
    df_b['kind'] = 'norms'

    # rename different metrics to score, and various dataset origins to task
    df_a = df_a.rename(columns={'source': 'task', 'adjusted score': 'score'})
    df_s = df_s.rename(columns={'source': 'task', 'adjusted rank r': 'score'})
    df_n = df_n.rename(columns={'norm': 'task', 'adjusted r': 'score'})
    df_b = df_b.rename(columns={'norm': 'task', 'adjusted r': 'score'})

    # stack datasets
    df = pd.concat([df_a, df_s, df_n, df_b])

    # merge in corpus word counts
    df_corpus = pd.read_csv(os.path.join(path, 'paper_results', 'table_data.tsv'), sep='\t')
    df = df.merge(df_corpus[['lang', 'vecs', 'words']], how='inner', on=['lang', 'vecs'])

    df.to_csv('model_data.tsv', sep='\t', index=False)  # store merged data for record keeping

    df['log10_wordcount'] = np.log10(df['words'])  # log-transform word counts
    df['log10_wordcount_z'] = standardize(df['log10_wordcount'])  # standardize word counts

    # create sum-coded contrasts
    df['wiki'] = df['vecs'].apply(lambda x: sum_contrast(x, 'wiki', 'wiki+subs'))
    df['subs'] = df['vecs'].apply(lambda x: sum_contrast(x, 'subs', 'wiki+subs'))
    df['analogies'] = df['kind'].apply(lambda x: sum_contrast(x, 'analogies', 'similarities'))
    df['norms'] = df['kind'].apply(lambda x: sum_contrast(x, 'norms', 'similarities'))

    # define PyMC3 model for statistical inference
    with pm.Model() as beta_model:
        # define centered Normal priors for all the betas, sd = 1 (mild shrinkage prior)
        intercept = pm.Normal('μ', mu=0, sd=1)
        b_wordcount = pm.Normal('β log corpus word count', mu=0, sd=1)
        b_wiki = pm.Normal('β wiki vs. mean', mu=0, sd=1)
        b_subs = pm.Normal('β subs vs. mean', mu=0, sd=1)
        b_norms = pm.Normal('β norms vs. mean', mu=0, sd=1)
        b_analogies = pm.Normal('β analogies vs. mean', mu=0, sd=1)
        b_wiki_norms = pm.Normal('β wiki vs. mean:norms vs. mean', mu=0, sd=1)
        b_wiki_analogies = pm.Normal('β wiki vs. mean:analogies vs. mean', mu=0, sd=1)
        b_subs_norms = pm.Normal('β subs vs. mean:norms vs. mean', mu=0, sd=1)
        b_subs_analogies = pm.Normal('β subs vs. mean:analogies vs. mean', mu=0, sd=1)

        b_wikisubs = pm.Deterministic('β wiki+subs vs. mean', -1 * (b_subs + b_wiki))
        b_similarities = pm.Deterministic('β similarities vs. mean', -1 * (b_analogies + b_norms))
        b_wikisubs_norms = pm.Deterministic('β wiki+subs vs. mean:norms vs. mean', -1 * (b_subs_norms + b_wiki_norms))
        b_wikisubs_analogies = pm.Deterministic('β wiki+subs vs. mean:analogies vs. mean', -1 * (b_subs_analogies + b_wiki_analogies))
        b_subs_similarities = pm.Deterministic('β subs vs. mean:similarities vs. mean', -1 * (b_subs_analogies + b_subs_norms))
        b_wiki_similarities = pm.Deterministic('β wiki vs. mean:similarities vs. mean', -1 * (b_wiki_analogies + b_wiki_norms))

        # given the above, there are two ways to compute the interaction wiki+subs vs.mean:similarities vs. mean
        # both methods are given below, but we only need to use one
        # they give the exact same answer though, you can uncomment the second line to verify
        b_wikisubs_similarities = pm.Deterministic('β wiki+subs vs. mean:similarities vs. mean', -1 * (b_wiki_similarities + b_subs_similarities))
        # b_wikisubs_similarities2 = pm.Deterministic('β wiki+subs vs. mean:similarities vs. mean (2)', -1 * (b_wikisubs_analogies + b_wikisubs_norms))

        # non-centered parametrization for task-level random intercepts
        task_codes, task_uniques = df['task'].factorize()  # get number of unique groups and code them
        mu_tilde_task = pm.Normal('μ\u0303 task', mu=0, sd=1, shape=len(task_uniques))  # prior for task group offsets
        sigma_task = pm.HalfNormal('σ task', sd=1)  # prior for task group sigma
        mu_task = pm.Deterministic('μ task', sigma_task * mu_tilde_task)  # task group means (random intercepts)

        # non-centered parametrization for language-level random intercepts
        lang_codes, lang_uniques = df['lang'].factorize()  # get number of unique groups and code them
        mu_tilde_lang = pm.Normal('μ\u0303 lang', mu=0, sd=1, shape=len(lang_uniques))  # prior for lang group offsets
        sigma_lang = pm.HalfNormal('σ lang', sd=1)  # prior for lang group sigma
        mu_lang = pm.Deterministic('μ lang', sigma_lang * mu_tilde_lang)  # lang group means (random intercepts)

        # compute predictions for y, using logit link function
        y_hat = pm.Deterministic('ŷ', pm.math.invlogit(
            intercept
            + b_wordcount * df['log10_wordcount_z']
            + b_wiki * df['wiki']
            + b_subs * df['subs']
            + b_norms * df['norms']
            + b_analogies * df['analogies']
            + b_wiki_norms * df['wiki'] * df['norms']
            + b_wiki_analogies * df['wiki'] * df['analogies']
            + b_subs_norms * df['subs'] * df['norms']
            + b_subs_analogies * df['subs'] * df['analogies']
            + mu_lang[lang_codes]
            + mu_task[task_codes]
        ))

        # define likelihood
        invphi = pm.HalfNormal('1 / φ', sd=1)  # prior for phi, for Beta(mu, phi) parametrization of the likelihood distribution
        phi = pm.Deterministic('φ', 1 / invphi)
        y = pm.Beta('y', alpha=y_hat * phi, beta=(1 - y_hat) * phi, observed=df['score'])

        # sample with 3 chains, 2000 warmup + 4000 posterior samples per chain
        trace = pm.sample(2500, tune=2500, chains=4, target_accept=.9)

    # store trace summary as tsv and LaTeX table
    df_summary = pm.summary(trace, credible_interval=.9)
    df_summary.to_csv('trace_summary.tsv', sep='\t')
    with open('trace_summary_latex.txt', 'w') as latextable:
        latextable.write(df_summary.round(2).to_latex())

    # draw and store model graph
    graph = pm.model_to_graphviz(beta_model)
    graph.graph_attr['rankdir'] = 'LR'  # change graph orientation to left-right (from top-down)
    graph.render(filename='model', format='pdf', cleanup=True)

    # draw and store forest plot
    varnames = [
        'μ',
        'β log corpus word count',
        'β subs vs. mean',
        'β wiki vs. mean',
        'β wiki+subs vs. mean',
        'β analogies vs. mean',
        'β norms vs. mean',
        'β similarities vs. mean',
        'β subs vs. mean:analogies vs. mean',
        'β subs vs. mean:norms vs. mean',
        'β subs vs. mean:similarities vs. mean',
        'β wiki vs. mean:analogies vs. mean',
        'β wiki vs. mean:norms vs. mean',
        'β wiki vs. mean:similarities vs. mean',
        'β wiki+subs vs. mean:analogies vs. mean',
        'β wiki+subs vs. mean:norms vs. mean',
        'β wiki+subs vs. mean:similarities vs. mean',
    ]
    axes = pm.forestplot(trace, var_names=varnames, credible_interval=.9, combined=True, figsize=(4, 6))
    axes[0].set(title='90% credible intervals', xlabel='coefficient (in log-odds)')
    plt.savefig('forestplot.pdf')
    plt.savefig('forestplot.png', dpi=600)
    plt.clf()

    # draw and store trace plot
    pm.traceplot(trace)
    plt.savefig('traceplot.png', dpi=300)  # the traceplot is huge, so we lower the resolution and don't store it as pdf
    plt.clf()

    return df_summary
def plot_plate(compiled_model, model_type):
    g = pm.model_to_graphviz(compiled_model)
    g.render(f"../plots_python/{model_type}_plate", format="png")
        # likelihood
        y_pred = pm.Normal(
            "y_pred",
            mu = mu, 
            sigma = sigma, 
            observed = y_train)

        # return the model                
        return m_pooled

# now run the function to compile the model
m_pooled = pooled()

### python: plate notation ###
pm.model_to_graphviz(m_pooled)

### python: prior predictive checks ###
# sample prior predictive 
with m_pooled:
    prior = pm.sample_prior_predictive(700, random_seed = RANDOM_SEED) 
    idata_pooled = az.from_pymc3(prior=prior)

# set up plot 
fig, ax = plt.subplots()

# if you just want the figure then this is enough
az.plot_ppc(idata_pooled, # the idata
            group = "prior", # plot the prior
            num_pp_samples = 100, # how many draws
            ax = ax) # add to matplotlib ax. 
#https://docs.pymc.io/notebooks/getting_started.html

import pymc3 as mc
import numpy as np

basic_model = mc.Model()

data = np.array([0, 1, 0, 1])
import matplotlib.pyplot as plt

## Set up the model
with basic_model:
    data = np.array([1, 0, 0, 0, 1])
    theta = mc.Beta('theta', alpha=1, beta=1)
    y = mc.Bernoulli('y', p=theta, observed=data)

mc.model_to_graphviz(basic_model)

## Estimate distributions
with basic_model:
    start = mc.find_MAP() # Find starting value by optimization
    step = mc.NUTS(scaling=start) # Instantiate MCMC sampling algorithm
    trace = mc.sample(2000, step, start=start, njobs=4, progressbar=True)

## Plot
mc.traceplot(trace)
plt.show()
Exemple #19
0
        ax=ax,
        vmin=-1,
        vmax=1,
        cmap=balance_r,
        annot=True)
ax.set_facecolor('black')

with pm.Model() as m11:
    σ = pm.Exponential('σ', 1)
    β_PC = pm.Normal('β_PC', 0, 1)
    β_GC = pm.Normal('β_GC', 0, 1)
    α = pm.Normal('α', 0, 1)
    μ = α + β_PC * d_.P.values + β_GC * d_.G.values
    C = pm.Normal('C', mu=μ, sd=σ, observed=d_.C.values)

pm.model_to_graphviz(m11)
with m11:
    trc11 = pm.sample(1000, tune=1500)
pm.summary(trc11, alpha=0.11)
"""
The unmeasured U makes P a collider, and conditioning on P produces collider
bias. Assuming we can measure U, below is the multivariate regression that also
conditions on U:
"""
with pm.Model() as m12:
    σ = pm.Exponential('σ', 1)
    β_PC = pm.Normal('β_PC', 0, 1)
    β_GC = pm.Normal('β_GC', 0, 1)
    β_U = pm.Normal('β_U', 0, 1)
    α = pm.Normal('α', 0, 1)
    μ = α + β_PC * d_.P.values + β_GC * d_.G.values + β_U * d_.U.values
Exemple #20
0
def renderGraphicalModel(model):
    pm.model_to_graphviz(model).render(filename='model',
                                       view=True,
                                       cleanup=True)
Exemple #21
0
hrs = le.classes_
n_hrs = len(hrs)

with pm.Model() as EVmodel:

    # Hyper-Priors
    hyper_mu_sd = pm.Uniform('hyper_mu_sd', lower=0, upper=10)
    hyper_mu_mu = pm.Uniform('hyper_mu_mu', lower=0, upper=10)

    # Priors
    mu = pm.Gamma('mu', mu=hyper_mu_mu, sigma=hyper_mu_sd, shape=n_hrs)

    # Data Likelihood
    y_like = pm.Poisson('y_like', mu=mu[hrs_idx], observed=obsVals)

pm.model_to_graphviz(EVmodel)

#% Hierarchical Model Inference

# Setup vars
smpls = 2500
tunes = 500
ch = 4

# Print Header
print('Work Charging')
print('Params: samples = ', smpls, ' | tune = ', tunes)

with EVmodel:
    trace = pm.sample(smpls, tune=tunes, chains=ch, cores=1)
Exemple #22
0
args = parser.parse_args()

# generate a sample using the helper function
galaxies = generate_sample.generate_sample(n_gals=args.ngals, seed=0)

if args.output == '':
    args.output = 'n{}d{}t{}.pickle'.format(
        args.ngals or len(galaxies),
        args.ndraws,
        args.ntune,
    )

# initialize the model using the custom BHSM class
bhsm = UniformBHSM(galaxies)

pm.model_to_graphviz(bhsm.model).view('plots/model_graphviz.pdf')
sys.exit(0)

trace = bhsm.do_inference(
    draws=args.ndraws,
    tune=args.ntune,
)

# save EVERYTHING
with open(args.output, "wb") as buff:
    pickle.dump(
        {
            'model': bhsm,
            'trace': trace,
            'n_samples': args.ndraws,
            'n_burn': args.ntune
Exemple #23
0
 def show_model(self, save=False, view=True, cleanup=True):
     model_graph = pm.model_to_graphviz(self.model)
     if save:
         model_graph.render(save, view=view, cleanup=cleanup)
     if view:
         return model_graph
basic_model = pm.Model()

with basic_model:
    # Priors for unknown model parameters
    alpha = pm.Normal("alpha", mu=0, sigma=10)
    beta0 = pm.Normal("beta0", mu=1, sigma=5)
    beta1 = pm.Normal("beta1", mu=2, sigma=5)
    sigma = pm.HalfNormal("sigma", sigma=1)

    # Expected value of outcome
    mu = alpha + beta0 * X1 + beta1 * X2

    # Likelihood (sampling distribution) of observations
    Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=Y)

pm.model_to_graphviz(basic_model)

# %% FIND MOST LIKELY PARAMETERS
map_estimate = pm.find_MAP(model=basic_model)
pprint(map_estimate)

# %% FIND DISTRIBUTIONS OF PARAMETERS
with basic_model:
    # draw 500 posterior samples
    trace = pm.sample(5000)

# %%
pm.traceplot(trace)

# %%
pm.summary(trace).round(2)
    binary_output.reset_index(drop = True, inplace = True)
    
    # concatenate
    #X_tmp = X_tmp_1.loc[:, ['mod00_booking_yn', 'mod99_cap_member_id']] 
    output = pd.concat([transposed_output_sorted_tmp, binary_output], axis = 1)
    output.set_index('index', inplace = True)
    y_predicted = pd.concat([output, y_test_oob, X_test_oob.loc[:, 'mod99_cap_member_id']], axis = 1)
    
    # compute and print classification metrics                    
    MCC_metric = matthews_corrcoef(y_predicted.loc[:, 'truth'], y_predicted.loc[:, 'binary'])
    precision_metric = precision_score(y_predicted.loc[:, 'truth'], y_predicted.loc[:, 'binary'])
    recall_metric = recall_score(y_predicted.loc[:, 'truth'], y_predicted.loc[:, 'binary'])
    print('MCC = %.3f\n precision = %.3f\n recall = %.3f' % \
              (MCC_metric, precision_metric, recall_metric ))  

"""
###############################################################################
## 10. MCMC TRACE DIAGNOSTICS [to be done only once for calibrating the bayesian model]  


# see graph for model
import graphviz
pm.model_to_graphviz(varying_intercept_slope_noncentered)

# too RAM damanding
data = az.convert_to_dataset(varying_intercept_slope_noncentered_trace)

## show traces
pm.traceplot(varying_intercept_slope_noncentered_trace)  

#az.plot_trace(glm_model_trace, compact=True)
    thetas = pm.Beta('thetas',
                     alpha=phi * kappa,
                     beta=(1.0 - phi) * kappa,
                     shape=N)
    y = pm.Binomial('y', n=at_bats, p=thetas, observed=hits)

#%%

with baseball_model:

    theta_new = pm.Beta('theta_new',
                        alpha=phi * kappa,
                        beta=(1.0 - phi) * kappa)
    y_new = pm.Binomial('y_new', n=4, p=theta_new, observed=0)

#%%

with baseball_model:
    trace = pm.sample(2000, cores=1, tune=1000, chains=2, target_accept=0.95)

pm.traceplot(trace, var_names=['phi', 'kappa'])

#%%

pm.model_to_graphviz(baseball_model)

#out_smry = pd.DataFrame(pm.summary(trace))

#fig = plt.gcf()
#fig.savefig("out_hr" + str(int(h)) + "_tracePlt" + ".png")
Exemple #27
0
                                               mu=mode_one_mean[bird_idx],
                                               sd=mode_one_sd[bird_idx])
            mode_two[bird_day_idx] = pm.Normal(bird_day_id + '_2',
                                               mu=mode_two_mean[bird_idx],
                                               sd=mode_two_sd[bird_idx])
            mode_three[bird_day_idx] = pm.Normal(bird_day_id + '_3',
                                                 mu=mode_three_mean[bird_idx],
                                                 sd=mode_three_sd[bird_idx])

            wake[bird_day_idx] = pm.Normal(bird_day_id + '_W',
                                           mu=wake_mean[bird_idx],
                                           sd=wake_sd[bird_idx])
            sleep[bird_day_idx] = pm.Normal(bird_day_id + '_S',
                                            mu=sleep_mean[bird_idx],
                                            sd=sleep_sd[bird_idx])

            if (day_idx + 1) != len(bird):
                for obs_idx, obs in enumerate(day):
                    bird_day_obs_idx = bird_day_idx + obs_idx
                    bird_day_obs_id = bird_day_id + '_' + str(obs_idx)

                    #y[bird_day_obs_idx] = pm.Normal(bird_day_obs_id, mu=mu, sd=sd, observed=obs[1])
            else:
                for obs_idx in enumerate(day):
                    bird_day_obs_idx = bird_day_idx + obs_idx
                    bird_day_obs_id = bird_day_id + '_' + str(obs_idx)

                    #y[bird_day_obs_idx] = pm.Normal(bird_day_obs_id, mu=mu, sd=sd, observed=obs[1])

pm.model_to_graphviz(model)
Exemple #28
0
    with pm.Model() as model:
        step_size = pm.Exponential("step_size", 10)
        volatility = pm.GaussianRandomWalk("volatility",
                                           sigma=step_size,
                                           shape=len(data))
        nu = pm.Exponential("nu", 0.1)
        returns = pm.StudentT("returns",
                              nu=nu,
                              lam=np.exp(-2 * volatility),
                              observed=data["change"])
    return model


stochastic_vol_model = make_stochastic_volatility_model(returns)
# %%
pm.model_to_graphviz(stochastic_vol_model)
# %%
with stochastic_vol_model:
    prior = pm.sample_prior_predictive(500)
# %%
fig, ax = plt.subplots(figsize=(14, 4))
returns["change"].plot(ax=ax, lw=1, color="black")
ax.plot(prior["returns"][4:6].T, "g", alpha=0.5, lw=1, zorder=-10)

max_observed, max_simulated = np.max(np.abs(returns["change"])), np.max(
    np.abs(prior["returns"]))
ax.set_title(
    f"Maximum observed: {max_observed:.2g}\nMaximum simulated: {max_simulated:.2g}(!)"
)
# %%
with stochastic_vol_model:
plt.ylabel("daily returns in %")

# %%
with pm.Model() as sp500_model:
    nu = pm.Exponential("nu", 1 / 10.0, testval=5.0)
    sigma = pm.Exponential("sigma", 1 / 0.02, testval=0.1)

    s = pm.GaussianRandomWalk("s", sigma=sigma, shape=len(returns))
    volatility_process = pm.Deterministic(
        "volatility_process", pm.math.exp(-2 * s) ** 0.5
    )

    r = pm.StudentT("r", nu=nu, sigma=volatility_process, observed=returns["change"])

# %%
pm.model_to_graphviz(sp500_model)

# %%
with sp500_model:
    trace = pm.sample(2000)

# %%
pm.traceplot(trace)

# %%l
fig, ax = plt.subplots(figsize=(15, 8))
returns.plot(ax=ax)
ax.plot(returns.index, 1 / np.exp(trace["s", ::5].T), "C3", alpha=0.03)
ax.set(title="volatility_process", xlabel="time", ylabel="volatility")
ax.legend(["S&P500", "stochastic volatility process"])
        plt.sca(ax[i][0])
        for j in range(trace.nchains):
            chain = trace.get_values(p, chains=[j])
            sns.kdeplot(chain)
            ax[i][1].plot(chain, alpha=0.25)
            if names is not None:
                plt.title(names[i])
    plt.tight_layout()


print('\tPlotting model')
try:
    with bhsm.model as model:
        pm.model_to_graphviz(bhsm.model).render(
            os.path.join(args.output, 'model'),
            view=False,
            format='pdf',
            cleanup=True,
        )
except ImportError:
    pass

print('\tPlotting traceplot')
# # this use too much RAM, so we define our own above
# pm.traceplot(
#     trace,
#     var_names=var_names
# )
traceplot(trace, var_names, names)
plt.savefig(os.path.join(args.output, 'trace.png'), bbox_inches='tight')
plt.close()