Beispiel #1
0
def neff_det_check_plot(c):
    fit = az.convert_to_inference_data(c)

    az.plot_density(fit, var_names=['neff_det'], credible_interval=0.99)

    xlabel(r'$N_\mathrm{eff}$')
    ylabel(r'$p\left( N_\mathrm{eff} \right)$')

    nobs = c.posterior['m1s'].shape[2]
    axvline(4 * nobs)

    nemin = percentile(c.posterior['neff_det'], 2.5)
    title(r'Two-sigma lower $N_\mathrm{{eff}}$ is factor {:.2f} above limit'.
          format(nemin / (4 * nobs)))
Beispiel #2
0
def prior_density_plot(variable, data, plottype, plot='prior'):
    """
    Method for producing the prior kde plot using arviz plot_density. 
    This is done 2 ways: either will produce all the plots onto one graph or will produe them seperately
    The parameters are the data, plottype, either "Seperate Plots" or "Same Plots" 
    Also takes the variable to view which will be chosen by the dropdown in the program. 
    """
    plots = pn.Column(scroll=True, max_height=750, sizing_mode='stretch_both')
    if plottype == 'Separate Plots':
        for key, value in data.items():
            plot = plot_call_KDE(group='prior', key=key, var=variable, value=value)
            plots.append(row(plot[0].tolist()))
    else:
        kwg = dict(height=450, width=650,toolbar_location='right')
        plot = az.plot_density(
            priors_same_plot_list(model_dict=data), 
            group='prior', 
            var_names=variable,
            outline=False,  
            backend='bokeh',
            shade=.5, 
            show=False, 
            colors=color_pool_gen(data),
            data_labels=list(data.keys()),
            backend_kwargs=kwg,
            )
        for p in plot[0]:
            # move legend to side of plot so its not obscuring any info
            legend = p.legend[0]
            legend.location = (10,-10)
            legend.orientation = "vertical"
            p.add_layout(legend, place='right')
        plots.append(column(plot[0].tolist()))
    return plots
Beispiel #3
0
def plot_call_KDE(group, key, var, value, percent=100):
    """ using seperated function to allow for plot caching. Used for both prior and posterior
    KDE plots. If the key, var combo has already been called for that group the plot 
    will be retrieved from cache.
    
    @group should be either prior or posterior 
    
    if the group is posterior will check the percentage of data it needs to plot
    """

    if group == 'posterior':
        data = value.posteriors[percent]
    else:
        data = value.model_arviz_data
    kwg = dict(height=250, width=550)
    plot = az.plot_density(
        data,
        group=group, 
        var_names=var,
        outline=False, 
        backend='bokeh',
        shade=.5, 
        show=False,
        colors=value.color,
        backend_kwargs=kwg
        )
    for p in plot[0]:
        # setting the title of the plots so have the config name at the start
        # also changing the axis range so plots are linked at same range
        p.title.text = key+' '+p.title.text 
        if p.legend:
            p.legend.visible = False
    return plot
Beispiel #4
0
def plot_param_diagnostics(mod,
                           incl_noise_params=False,
                           incl_trend_params=False,
                           incl_smooth_params=False,
                           which='trace',
                           **kwargs):
    """
    Parameters
    -----------
    mod : orbit model object
    which : str, {'density', 'trace', 'pair', 'autocorr', 'posterior', 'forest'}
    incl_noise_params : bool
        if plot noise parameters; default False
    incl_trend_params : bool
        if plot trend parameters; default False
    incl_smooth_params : bool
        if plot smoothing parameters; default False
    **kwargs :
        other parameters passed to arviz functions

    Returns
    -------
        matplotlib axes object
    """
    posterior_samples = get_arviz_plot_dict(
        mod,
        incl_noise_params=incl_noise_params,
        incl_trend_params=incl_trend_params,
        incl_smooth_params=incl_smooth_params)

    if which == "trace":
        axes = az.plot_trace(posterior_samples, **kwargs)
    elif which == "density":
        axes = az.plot_density(posterior_samples, **kwargs)
    elif which == "posterior":
        axes = az.plot_posterior(posterior_samples, **kwargs)
    elif which == "pair":
        axes = az.plot_pair(posterior_samples, **kwargs)
    elif which == "autocorr":
        axes = az.plot_autocorr(posterior_samples, **kwargs)
    elif which == "forest":
        axes = az.plot_forest(posterior_samples, **kwargs)
    else:
        raise Exception(
            "please use one of 'trace', 'density', 'posterior', 'pair', 'autocorr', 'forest' for kind."
        )

    return axes
Beispiel #5
0
"""
Density Plot
============

_thumb: .5, .5
_example_title: Plot density
"""
import matplotlib.pyplot as plt

import arviz as az

az.style.use("arviz-darkgrid")

centered_data = az.load_arviz_data("centered_eight")
non_centered_data = az.load_arviz_data("non_centered_eight")
az.plot_density(
    [centered_data, non_centered_data],
    data_labels=["Centered", "Non Centered"],
    var_names=["theta"],
    shade=0.1,
)
plt.show()
ax.axvline(height.mean())
ax.set(title='Posterior predictive of the mean',
       xlabel='mean(x)',
       ylabel='Frequency')

_, ax = plt.subplots(figsize=(12, 6))
ax.hist([h.mean() for h in no_collinear_ppc['h']])
ax.axvline(height.mean())
ax.set(title='Posterior predictive of the mean',
       xlabel='mean(x)',
       ylabel='Frequency')

# Plot posterior density for models
#%%
az.plot_density([trace_collinear, trace_no_collinear],
                data_labels=['collinear', 'no collinear'],
                var_names=['br'],
                shade=0.1)

# Compare plots
#%%
az.plot_forest([trace_collinear, trace_no_collinear],
               model_names=['collinear', 'no collinear'],
               var_names=['br', 'a', 'sigma'])

#%%
az.plot_forest([trace_collinear, trace_no_collinear],
               model_names=['collinear', 'no collinear'],
               var_names=['br', 'a', 'sigma'],
               kind='ridgeplot')
Beispiel #7
0
"""
Density Plot
============

_thumb: .5, .5
"""
import arviz as az

az.style.use('arviz-darkgrid')

centered_data = az.load_arviz_data('centered_eight')
non_centered_data = az.load_arviz_data('non_centered_eight')
az.plot_density([centered_data, non_centered_data],
                data_labels=['Centered', 'Non Centered'],
                var_names=['theta'],
                shade=0.1)
Beispiel #8
0
                     prior=prior,
                     posterior_predictive=post)
data

# %% 

az.plot_trace(data)
plt.show()

# %% 
# sphinx_gallery_thumbnail_number = 3
az.plot_density([data, data.prior], shade=.9, data_labels=["Posterior", "Prior"],
                var_names=[
                    '$\\mu_{top}$',
                    '$\\mu_{bottom}$',
                    '$\\mu_{thickness}$',
                    '$\\sigma_{top}$',
                    '$\\sigma_{bottom}$',
                    '$\\sigma_{thickness}$'
                ],
                colors=[default_red, default_blue], bw=5);
plt.show()

# %%

p = pp.PlotPosterior(data)

p.create_figure(figsize=(9, 5), joyplot=False, marginal=True, likelihood=False)
p.plot_marginal(var_names=['$\\mu_{top}$', '$\\mu_{bottom}$'],
                plot_trace=False, credible_interval=.70, kind='kde',
                marginal_kwargs={"bw": 1}
                )
Beispiel #9
0
np.mean(prior_sample['date_sigmas'])




with open('data/county_trace.pkl','wb') as f:
    pickle.dump(trace, f)
    
with open('data/county_trace.pkl','rb') as f:
    trace = pickle.load(f)

pm.traceplot(trace)
import arviz as az

data = az.from_pymc3(prior=prior_sample)
az.plot_density(data,group='prior')

az.plot_trace(trace, var_names=['date_mus'])
az.plot_forest(trace, var_names=['date_mus'])
az.plot_forest(trace, var_names=['state_mu'], r_hat=True, combined=True)
az.plot_forest(trace, var_names=['state_kappa'], r_hat=True, combined=True)
az.plot_forest(trace, var_names=['theta'], r_hat=True, combined=False)

az.plot_posterior(trace, var_names='county_thetas', combined=False)

import seaborn as sns
sns.kdeplot(prior_sample['y'])

date_mus = trace.get_values('date_mus',combine=False)
date_mus = np.array(date_mus)
date_mus.shape
"""
Density Plot
============

_thumb: .5, .5
"""
import arviz as az

centered_data = az.load_arviz_data("centered_eight")
non_centered_data = az.load_arviz_data("non_centered_eight")
ax = az.plot_density(
    [centered_data, non_centered_data],
    data_labels=["Centered", "Non Centered"],
    var_names=["theta"],
    shade=0.1,
    backend="bokeh",
)
Beispiel #11
0
def view_stan_result(fit):
    az.plot_density(fit, var_names=['w', 's'])
    return
Beispiel #12
0
def generate_model(data_file):
    read_file = pd.read_csv(data_file)  # reads csv file and scans for lat & lon columns
    N = len(read_file.values)  # number of latitude || longitude coordinates
    edges = N  # number of edges, also equal to N of dataset
    K = 1  # represents the coordinate pairs
    scaling_factor = 2.0  # factor of two for variance consistency
    node1 = []  # list of indices
    node2 = []
    y1 = []  # number of outcomes appended using random
    for i in range(N):
        rand = r.randint(1, 2)
        y1.append(rand)
        node1.append(rand)
        node2.append(rand)
    design_matrix = []
    for val in read_file.values:
        design_matrix.append([val[1]])  # matrix with coordinate pair as each array listing

    # stan code block for the data and parameters
    stan_code = """
    functions {
        real icar_normal_lpdf(vector phi, int N, int[] node1, int[] node2){
            return -0.5 * dot_self(phi[node1] - phi[node2]) + normal_lpdf(sum(phi) | 0, 0.001 * N);
        }
    }
        
    data {
        int<lower=0> N;
        int<lower=0> edges;
        int<lower=1, upper=N> node1[edges];
        int<lower=1, upper=N> node2[edges];
        real<lower=0> scaling_factor;
        int<lower=1> K;
        int<lower=0> y[N];
        matrix[N, K] x;
    }
        
    parameters {
        real beta0;
        vector[K] betas;
        real logit_rho;
        vector[N] phi;
        vector[N] theta;
        real<lower=0> sigma;
    }
        
    transformed parameters {
        real<lower=0, upper=1> rho = inv_logit(logit_rho);
        vector[N] convolved_re = sqrt(rho / scaling_factor) * phi + sqrt(1 - rho) * theta;
    }
        
    model {
        y ~ poisson_log(beta0 + x * betas + convolved_re * sigma);
        beta0 ~ normal(0, 1);
        betas ~ normal(0, 1);
        logit_rho ~ normal(0, 1);
        sigma ~ normal(0, 1);
        theta ~ normal(0, 1);
        phi ~ icar_normal_lpdf(N, node1, node2);
    }
    """

    # model data initialization

    bym_data = {
        'N': N,  # size of the graph = number of values in csv
        'edges': edges,  # edge sets representing relations
        'node1': node1,  # set of indices corresponding to 1st component(i) of ICAR
        'node2': node2,  # set of indices corresponding to 2nd component(j) of ICAR
        'scaling_factor': scaling_factor,  # variance between spatial points
        'K': K,  # number of covariates
        'y': y1,  # number of outcomes
        'x': np.array(design_matrix)  # matrix for design of the structure of graph
    }

    # Utilize pystan package to allow the built-in StanModel class to fully generate a model and arviz to plot
    sm = pystan.StanModel(model_code=stan_code)
    fit = sm.sampling(data=bym_data, iter=1000, chains=4, sample_file='results.csv')
    data_summary = pystan.stansummary(fit=fit, pars=None, probs=(0.5, 0.975), digits_summary=3)
    print(data_summary)
    ar.plot_density(fit, var_names=['beta0', 'betas', 'logit_rho', 'sigma', 'theta'])

    first_path = os.path.abspath('./' + 'results_0.csv')
    new_path = os.path.abspath('fitted_data/')
    shutil.move(first_path, new_path + '/' + 'results_0.csv')

    betas = []
    df = pd.read_csv('fitted_data/results_0.csv', usecols=['beta0'], comment='#')
    numbers = df.iloc[:, 0].values
    for num in numbers:
        betas.append(num)

    usa = []
    sf = pd.read_csv('datasets/us_unemployment_2012.csv', usecols=['State'])
    states = sf.iloc[:, 0].values
    for state in states:
        usa.append(state)

    with open('mod_results.csv', 'w', newline='') as newFile:
        newFileWriter = csv.writer(newFile)
        newFileWriter.writerow(["State", "Rate"])
        for i in range(len(usa)):
            newFileWriter.writerow(([usa[i], betas[i]]))
Beispiel #13
0
               'NumberPurchases': datad['NumberPurchasesPerUnitRounded'].values,
               'TotalSum': TotalSum.values}






sm = pystan.StanModel(model_code=clv_model)
fit = sm.sampling(data=clv_data, iter=1000, chains=4)



## Diagnostics

az.plot_density(fit, var_names=["s", "beta","r","alpha","totlife"]);
az.plot_trace(fit, var_names=["s", "beta","r","alpha"]);

az.summary(fit, var_names=["s", "beta","r","alpha"]);


# Plot trace of future_amount


# FutureAmount_Summary = az.summary(fit, var_names=["amount"])
# FutureAmount_mean = FutureAmount_Summary["mean"]
# Re["FutureAmount_mean"] = FutureAmount_mean.values



Beispiel #14
0
# ```arviz```を使うよう勧められる。
#
# インストールはanacondaを使っていれば```conda install -c conda-forge arviz```
#
# 使い方詳しくは[公式](https://arviz-devs.github.io/arviz/notebooks/Introduction.html)

import arviz as az

az.style.use("arviz-darkgrid")
az.plot_posterior(fit)

print(fit)

az.plot_trace(fit)

az.plot_density(fit)

# 今まではパラメータの事後分布$p(w|X) \propto p(X|w)p(w)$をMCMCサンプリングで近似的に求めたので、
#
# そのパラメータの事後分布で確率モデルを平均した予測分布$p(x^*|X)=\int p(x*|w)p(w|X)$を求める。
#
# ベイズ推定による統計モデリングでは、この予測分布とサンプルを発生している真の確率分布との誤差(汎化誤差)を小さくすることを目指す。
#
# 詳しくは、[ベイズ統計の理論と方法](http://watanabe-www.math.dis.titech.ac.jp/users/swatanab/bayes-theory-method.html)や[著者HPの講義資料やQA](http://watanabe-www.math.dis.titech.ac.jp/users/swatanab/index-j.html)を参照。
#
# - [arviz.from_pystan document](https://arviz-devs.github.io/arviz/generated/arviz.from_pystan.html)
#
#     - ```cords``` : インデックスとして使われてる値

data = az.from_pystan(posterior=fit,
                      posterior_predictive='y_hat',
Beispiel #15
0
import pystan
import pandas as pd
import numpy as np
import scipy as sp
import arviz as az  # ??? matplotlib replacement?
import pickle

np.random.seed(33)

N = 50
x = np.random.randn(N)

model = "model.stan"
try:
    with open("pickled_model.pickle", "rb") as pickle_file:
        stan_model = pickle.load(pickle_file)
except FileNotFoundError:
    stan_model = pystan.StanModel(file=model)
    with open("pickled_model.pickle", "wb") as pickle_file:
        pickle.dump(stan_model, pickle_file)

fit = stan_model.sampling(data={'N': N, 'x': x}, chains=1)

print(fit)

# Arviz, doesn't work at the moment
az.plot_density(fit, var_names=['mu', 'sigma'])

# Old fashioned, generates a warning but works
fit.plot().savefig("./images/pygraph-savefig.png")
Beispiel #16
0
    mu = alpha
    y_obs = pm.Normal('y_obs', mu=mu, sigma=np.exp(log_sigma), observed=d['kcal.per.g'])

pm.find_MAP(model=m6_11, method='BFGS')

with m6_11:
    trace = pm.sample(2000, return_inferencedata=True, chains=2)
pm.summary(trace)
az.summary(trace)
#pm.gelman_rubin(trace)
with m6_11:
    az.plot_trace(trace)
plt.show()
az.plot_autocorr(trace)
plt.show()
az.plot_density(trace)
plt.show()
az.plot_forest(trace)
plt.show()

# might need to multiply by -2 to compare with McElreath
with m6_11:
    print(pm.waic(trace))
    print(pm.loo(trace))


#m6_13 = pm.Model()
with pm.Model() as m6_13:
    alpha = pm.Uniform('alpha', 0, 5)
    bm = pm.Uniform('bm', -10, 10)
    log_sigma = pm.Uniform('log_sigma', -10, 10)