def neff_det_check_plot(c): fit = az.convert_to_inference_data(c) az.plot_density(fit, var_names=['neff_det'], credible_interval=0.99) xlabel(r'$N_\mathrm{eff}$') ylabel(r'$p\left( N_\mathrm{eff} \right)$') nobs = c.posterior['m1s'].shape[2] axvline(4 * nobs) nemin = percentile(c.posterior['neff_det'], 2.5) title(r'Two-sigma lower $N_\mathrm{{eff}}$ is factor {:.2f} above limit'. format(nemin / (4 * nobs)))
def prior_density_plot(variable, data, plottype, plot='prior'): """ Method for producing the prior kde plot using arviz plot_density. This is done 2 ways: either will produce all the plots onto one graph or will produe them seperately The parameters are the data, plottype, either "Seperate Plots" or "Same Plots" Also takes the variable to view which will be chosen by the dropdown in the program. """ plots = pn.Column(scroll=True, max_height=750, sizing_mode='stretch_both') if plottype == 'Separate Plots': for key, value in data.items(): plot = plot_call_KDE(group='prior', key=key, var=variable, value=value) plots.append(row(plot[0].tolist())) else: kwg = dict(height=450, width=650,toolbar_location='right') plot = az.plot_density( priors_same_plot_list(model_dict=data), group='prior', var_names=variable, outline=False, backend='bokeh', shade=.5, show=False, colors=color_pool_gen(data), data_labels=list(data.keys()), backend_kwargs=kwg, ) for p in plot[0]: # move legend to side of plot so its not obscuring any info legend = p.legend[0] legend.location = (10,-10) legend.orientation = "vertical" p.add_layout(legend, place='right') plots.append(column(plot[0].tolist())) return plots
def plot_call_KDE(group, key, var, value, percent=100): """ using seperated function to allow for plot caching. Used for both prior and posterior KDE plots. If the key, var combo has already been called for that group the plot will be retrieved from cache. @group should be either prior or posterior if the group is posterior will check the percentage of data it needs to plot """ if group == 'posterior': data = value.posteriors[percent] else: data = value.model_arviz_data kwg = dict(height=250, width=550) plot = az.plot_density( data, group=group, var_names=var, outline=False, backend='bokeh', shade=.5, show=False, colors=value.color, backend_kwargs=kwg ) for p in plot[0]: # setting the title of the plots so have the config name at the start # also changing the axis range so plots are linked at same range p.title.text = key+' '+p.title.text if p.legend: p.legend.visible = False return plot
def plot_param_diagnostics(mod, incl_noise_params=False, incl_trend_params=False, incl_smooth_params=False, which='trace', **kwargs): """ Parameters ----------- mod : orbit model object which : str, {'density', 'trace', 'pair', 'autocorr', 'posterior', 'forest'} incl_noise_params : bool if plot noise parameters; default False incl_trend_params : bool if plot trend parameters; default False incl_smooth_params : bool if plot smoothing parameters; default False **kwargs : other parameters passed to arviz functions Returns ------- matplotlib axes object """ posterior_samples = get_arviz_plot_dict( mod, incl_noise_params=incl_noise_params, incl_trend_params=incl_trend_params, incl_smooth_params=incl_smooth_params) if which == "trace": axes = az.plot_trace(posterior_samples, **kwargs) elif which == "density": axes = az.plot_density(posterior_samples, **kwargs) elif which == "posterior": axes = az.plot_posterior(posterior_samples, **kwargs) elif which == "pair": axes = az.plot_pair(posterior_samples, **kwargs) elif which == "autocorr": axes = az.plot_autocorr(posterior_samples, **kwargs) elif which == "forest": axes = az.plot_forest(posterior_samples, **kwargs) else: raise Exception( "please use one of 'trace', 'density', 'posterior', 'pair', 'autocorr', 'forest' for kind." ) return axes
""" Density Plot ============ _thumb: .5, .5 _example_title: Plot density """ import matplotlib.pyplot as plt import arviz as az az.style.use("arviz-darkgrid") centered_data = az.load_arviz_data("centered_eight") non_centered_data = az.load_arviz_data("non_centered_eight") az.plot_density( [centered_data, non_centered_data], data_labels=["Centered", "Non Centered"], var_names=["theta"], shade=0.1, ) plt.show()
ax.axvline(height.mean()) ax.set(title='Posterior predictive of the mean', xlabel='mean(x)', ylabel='Frequency') _, ax = plt.subplots(figsize=(12, 6)) ax.hist([h.mean() for h in no_collinear_ppc['h']]) ax.axvline(height.mean()) ax.set(title='Posterior predictive of the mean', xlabel='mean(x)', ylabel='Frequency') # Plot posterior density for models #%% az.plot_density([trace_collinear, trace_no_collinear], data_labels=['collinear', 'no collinear'], var_names=['br'], shade=0.1) # Compare plots #%% az.plot_forest([trace_collinear, trace_no_collinear], model_names=['collinear', 'no collinear'], var_names=['br', 'a', 'sigma']) #%% az.plot_forest([trace_collinear, trace_no_collinear], model_names=['collinear', 'no collinear'], var_names=['br', 'a', 'sigma'], kind='ridgeplot')
""" Density Plot ============ _thumb: .5, .5 """ import arviz as az az.style.use('arviz-darkgrid') centered_data = az.load_arviz_data('centered_eight') non_centered_data = az.load_arviz_data('non_centered_eight') az.plot_density([centered_data, non_centered_data], data_labels=['Centered', 'Non Centered'], var_names=['theta'], shade=0.1)
prior=prior, posterior_predictive=post) data # %% az.plot_trace(data) plt.show() # %% # sphinx_gallery_thumbnail_number = 3 az.plot_density([data, data.prior], shade=.9, data_labels=["Posterior", "Prior"], var_names=[ '$\\mu_{top}$', '$\\mu_{bottom}$', '$\\mu_{thickness}$', '$\\sigma_{top}$', '$\\sigma_{bottom}$', '$\\sigma_{thickness}$' ], colors=[default_red, default_blue], bw=5); plt.show() # %% p = pp.PlotPosterior(data) p.create_figure(figsize=(9, 5), joyplot=False, marginal=True, likelihood=False) p.plot_marginal(var_names=['$\\mu_{top}$', '$\\mu_{bottom}$'], plot_trace=False, credible_interval=.70, kind='kde', marginal_kwargs={"bw": 1} )
np.mean(prior_sample['date_sigmas']) with open('data/county_trace.pkl','wb') as f: pickle.dump(trace, f) with open('data/county_trace.pkl','rb') as f: trace = pickle.load(f) pm.traceplot(trace) import arviz as az data = az.from_pymc3(prior=prior_sample) az.plot_density(data,group='prior') az.plot_trace(trace, var_names=['date_mus']) az.plot_forest(trace, var_names=['date_mus']) az.plot_forest(trace, var_names=['state_mu'], r_hat=True, combined=True) az.plot_forest(trace, var_names=['state_kappa'], r_hat=True, combined=True) az.plot_forest(trace, var_names=['theta'], r_hat=True, combined=False) az.plot_posterior(trace, var_names='county_thetas', combined=False) import seaborn as sns sns.kdeplot(prior_sample['y']) date_mus = trace.get_values('date_mus',combine=False) date_mus = np.array(date_mus) date_mus.shape
""" Density Plot ============ _thumb: .5, .5 """ import arviz as az centered_data = az.load_arviz_data("centered_eight") non_centered_data = az.load_arviz_data("non_centered_eight") ax = az.plot_density( [centered_data, non_centered_data], data_labels=["Centered", "Non Centered"], var_names=["theta"], shade=0.1, backend="bokeh", )
def view_stan_result(fit): az.plot_density(fit, var_names=['w', 's']) return
def generate_model(data_file): read_file = pd.read_csv(data_file) # reads csv file and scans for lat & lon columns N = len(read_file.values) # number of latitude || longitude coordinates edges = N # number of edges, also equal to N of dataset K = 1 # represents the coordinate pairs scaling_factor = 2.0 # factor of two for variance consistency node1 = [] # list of indices node2 = [] y1 = [] # number of outcomes appended using random for i in range(N): rand = r.randint(1, 2) y1.append(rand) node1.append(rand) node2.append(rand) design_matrix = [] for val in read_file.values: design_matrix.append([val[1]]) # matrix with coordinate pair as each array listing # stan code block for the data and parameters stan_code = """ functions { real icar_normal_lpdf(vector phi, int N, int[] node1, int[] node2){ return -0.5 * dot_self(phi[node1] - phi[node2]) + normal_lpdf(sum(phi) | 0, 0.001 * N); } } data { int<lower=0> N; int<lower=0> edges; int<lower=1, upper=N> node1[edges]; int<lower=1, upper=N> node2[edges]; real<lower=0> scaling_factor; int<lower=1> K; int<lower=0> y[N]; matrix[N, K] x; } parameters { real beta0; vector[K] betas; real logit_rho; vector[N] phi; vector[N] theta; real<lower=0> sigma; } transformed parameters { real<lower=0, upper=1> rho = inv_logit(logit_rho); vector[N] convolved_re = sqrt(rho / scaling_factor) * phi + sqrt(1 - rho) * theta; } model { y ~ poisson_log(beta0 + x * betas + convolved_re * sigma); beta0 ~ normal(0, 1); betas ~ normal(0, 1); logit_rho ~ normal(0, 1); sigma ~ normal(0, 1); theta ~ normal(0, 1); phi ~ icar_normal_lpdf(N, node1, node2); } """ # model data initialization bym_data = { 'N': N, # size of the graph = number of values in csv 'edges': edges, # edge sets representing relations 'node1': node1, # set of indices corresponding to 1st component(i) of ICAR 'node2': node2, # set of indices corresponding to 2nd component(j) of ICAR 'scaling_factor': scaling_factor, # variance between spatial points 'K': K, # number of covariates 'y': y1, # number of outcomes 'x': np.array(design_matrix) # matrix for design of the structure of graph } # Utilize pystan package to allow the built-in StanModel class to fully generate a model and arviz to plot sm = pystan.StanModel(model_code=stan_code) fit = sm.sampling(data=bym_data, iter=1000, chains=4, sample_file='results.csv') data_summary = pystan.stansummary(fit=fit, pars=None, probs=(0.5, 0.975), digits_summary=3) print(data_summary) ar.plot_density(fit, var_names=['beta0', 'betas', 'logit_rho', 'sigma', 'theta']) first_path = os.path.abspath('./' + 'results_0.csv') new_path = os.path.abspath('fitted_data/') shutil.move(first_path, new_path + '/' + 'results_0.csv') betas = [] df = pd.read_csv('fitted_data/results_0.csv', usecols=['beta0'], comment='#') numbers = df.iloc[:, 0].values for num in numbers: betas.append(num) usa = [] sf = pd.read_csv('datasets/us_unemployment_2012.csv', usecols=['State']) states = sf.iloc[:, 0].values for state in states: usa.append(state) with open('mod_results.csv', 'w', newline='') as newFile: newFileWriter = csv.writer(newFile) newFileWriter.writerow(["State", "Rate"]) for i in range(len(usa)): newFileWriter.writerow(([usa[i], betas[i]]))
'NumberPurchases': datad['NumberPurchasesPerUnitRounded'].values, 'TotalSum': TotalSum.values} sm = pystan.StanModel(model_code=clv_model) fit = sm.sampling(data=clv_data, iter=1000, chains=4) ## Diagnostics az.plot_density(fit, var_names=["s", "beta","r","alpha","totlife"]); az.plot_trace(fit, var_names=["s", "beta","r","alpha"]); az.summary(fit, var_names=["s", "beta","r","alpha"]); # Plot trace of future_amount # FutureAmount_Summary = az.summary(fit, var_names=["amount"]) # FutureAmount_mean = FutureAmount_Summary["mean"] # Re["FutureAmount_mean"] = FutureAmount_mean.values
# ```arviz```を使うよう勧められる。 # # インストールはanacondaを使っていれば```conda install -c conda-forge arviz``` # # 使い方詳しくは[公式](https://arviz-devs.github.io/arviz/notebooks/Introduction.html) import arviz as az az.style.use("arviz-darkgrid") az.plot_posterior(fit) print(fit) az.plot_trace(fit) az.plot_density(fit) # 今まではパラメータの事後分布$p(w|X) \propto p(X|w)p(w)$をMCMCサンプリングで近似的に求めたので、 # # そのパラメータの事後分布で確率モデルを平均した予測分布$p(x^*|X)=\int p(x*|w)p(w|X)$を求める。 # # ベイズ推定による統計モデリングでは、この予測分布とサンプルを発生している真の確率分布との誤差(汎化誤差)を小さくすることを目指す。 # # 詳しくは、[ベイズ統計の理論と方法](http://watanabe-www.math.dis.titech.ac.jp/users/swatanab/bayes-theory-method.html)や[著者HPの講義資料やQA](http://watanabe-www.math.dis.titech.ac.jp/users/swatanab/index-j.html)を参照。 # # - [arviz.from_pystan document](https://arviz-devs.github.io/arviz/generated/arviz.from_pystan.html) # # - ```cords``` : インデックスとして使われてる値 data = az.from_pystan(posterior=fit, posterior_predictive='y_hat',
import pystan import pandas as pd import numpy as np import scipy as sp import arviz as az # ??? matplotlib replacement? import pickle np.random.seed(33) N = 50 x = np.random.randn(N) model = "model.stan" try: with open("pickled_model.pickle", "rb") as pickle_file: stan_model = pickle.load(pickle_file) except FileNotFoundError: stan_model = pystan.StanModel(file=model) with open("pickled_model.pickle", "wb") as pickle_file: pickle.dump(stan_model, pickle_file) fit = stan_model.sampling(data={'N': N, 'x': x}, chains=1) print(fit) # Arviz, doesn't work at the moment az.plot_density(fit, var_names=['mu', 'sigma']) # Old fashioned, generates a warning but works fit.plot().savefig("./images/pygraph-savefig.png")
mu = alpha y_obs = pm.Normal('y_obs', mu=mu, sigma=np.exp(log_sigma), observed=d['kcal.per.g']) pm.find_MAP(model=m6_11, method='BFGS') with m6_11: trace = pm.sample(2000, return_inferencedata=True, chains=2) pm.summary(trace) az.summary(trace) #pm.gelman_rubin(trace) with m6_11: az.plot_trace(trace) plt.show() az.plot_autocorr(trace) plt.show() az.plot_density(trace) plt.show() az.plot_forest(trace) plt.show() # might need to multiply by -2 to compare with McElreath with m6_11: print(pm.waic(trace)) print(pm.loo(trace)) #m6_13 = pm.Model() with pm.Model() as m6_13: alpha = pm.Uniform('alpha', 0, 5) bm = pm.Uniform('bm', -10, 10) log_sigma = pm.Uniform('log_sigma', -10, 10)