def __main__(): tobit_data = prepare_tobit_data() ad_matrix = get_students_adjacency(tobit_data) # here, try any of the models defined before. fit, model = scaled_spare_car(tobit_data, ad_matrix) # y_cens look ok though # tau quite large -> 23, close to the largest „friend_group“ # larger phis now :) in negative and positive! # investigate: can I use the normal_l(c)cdf function? # fit, model = tobit_simple_model(tobit_data, scaled=True) # fit, model = tobit_cum_sum_scaled(tobit_data) # fit, model = tobit_vec_QR(tobit_data) # note: this yields a expected values for β, but throws warnings for: # - Rhat (though everything is 1) # - az.plot_trace(fit, compact=True) az.plot_pair(fit, ['tau', 'alpha', 'sigma'], divergences=True) # seems like I'm having a lot of divergences where: # - sigma below 0.0025 # - alpha > 0.99 (would imply IAR) # -> constraining helped a bit. But having region _around_ sigma = 0.08 and 0.04 az.plot_energy(fit)
def create_diagnostic_plots(idx,pdf_filename,fit,diag_pars,niter,nchain): # Converting the Stan FIT object to Arviz InfereceData samples = fit.extract(permuted=True) # Extracting parameter samples data = az.from_pystan(fit) tmp = data.posterior var_names = list(tmp.data_vars) # Filtering the list of parameters to plot unwanted = {'losvd','spec','conv_spec','poly','bestfit','losvd_','losvd_mod','spec_pred','log_likelihood'} vars_main = [e for e in var_names if e not in unwanted] # Reading diagnostic parameters accept_stat, stepsize, treedepth = np.zeros((niter,nchain)), np.zeros((niter,nchain)) , np.zeros((niter,nchain)) n_leapfrog, divergent, energy = np.zeros((niter,nchain)), np.zeros((niter,nchain)) , np.zeros((niter,nchain)) for j in range(nchain): accept_stat[:,j] = diag_pars[j]['accept_stat__'] stepsize[:,j] = diag_pars[j]['stepsize__'] treedepth[:,j] = diag_pars[j]['treedepth__'] n_leapfrog[:,j] = diag_pars[j]['n_leapfrog__'] divergent[:,j] = diag_pars[j]['divergent__'] energy[:,j] = diag_pars[j]['energy__'] # Creating the plot in multiple PDF papges pdf_pages = PdfPages(pdf_filename) print(" - Sampler params") plot_sampler_params(idx,accept_stat,stepsize,treedepth,n_leapfrog,divergent,energy) pdf_pages.savefig() print(" - Chains") plot_chains(samples,vars_main) pdf_pages.savefig() # print(" - Trace plot [Main params]") # az.plot_trace(data, var_names=vars_main) # pdf_pages.savefig() # print(" - Trace plot [LOSVD]") # az.plot_trace(data, var_names=['losvd']) # pdf_pages.savefig() print(" - Pair plot") az.plot_pair(data, var_names=vars_main, divergences=True, kind='kde', fill_last=False) pdf_pages.savefig() print(" - Autocorr plot") az.plot_autocorr(data, var_names=vars_main) pdf_pages.savefig() print(" - Energy plot") az.plot_energy(data) pdf_pages.savefig() pdf_pages.close() return
def tobit_ifelse_model(tobit_data: pd.DataFrame): """ Use a loop instead of two matrices as preparation for using the adjacency matrix: """ censored_dict = get_datadict(tobit_data) censored_loop_model = StanModel( file=Path('models/tobit_students_ifelse.stan').open(), extra_compile_args=["-w"]) censored_loop_fit = censored_loop_model.sampling(data=censored_dict, iter=2000, chains=4, warmup=500) az.plot_trace(censored_loop_fit) az.plot_energy(censored_loop_fit) cens_loop_res = censored_loop_fit.extract() print('α: {}'.format(cens_loop_res['alpha'][501:].mean())) print('β: {}'.format(cens_loop_res['beta'][501:].mean(axis=0))) # yay works. intercept: 208.6, read: 2.70, math: 5.93, gen: -12.75, voc: -46.6 return censored_loop_fit, censored_loop_model
""" Energy Plot =========== _thumb: .7, .5 _example_title: Plot energy """ import matplotlib.pyplot as plt import arviz as az az.style.use("arviz-darkgrid") data = az.load_arviz_data("centered_eight") az.plot_energy(data, figsize=(12, 8)) plt.show()
'away_score' : away_score, 'npredict' : npredict, 'home_team_new' : home_team_new, 'away_team_new' : away_team_new } mod = pystan.StanModel(model_code=model_2) fit = mod.sampling(data = data, iter=15000, warmup=1000, chains=4, control=dict(max_treedepth = 14, adapt_delta = .9)) """### **ASSESSING MODEL**""" print(fit) az.style.use('arviz-darkgrid') inf_data = az.convert_to_inference_data(fit) az.plot_energy(inf_data) az.plot_trace(fit,var_names=['home_score_new', 'away_score_new']) az.plot_trace(fit, var_names=['att', 'def'], combined=True) plt.style.use('ggplot') _, ax = plt.subplots(1, 2, figsize=(15, 6)) az.plot_forest(fit, var_names="att",combined=True, ax=ax[0], kind='ridgeplot', ridgeplot_alpha=.5, ridgeplot_overlap=1.5, hdi_prob=.999, linewidth=.5) ax[0].set_yticklabels(sorted(names, reverse=True)) ax[0].set_title('Estimated Attack Effect (Positive is Better)', loc='left') ax[0].grid(True) az.plot_forest(fit, var_names="def", combined=True, ax=ax[1], kind='ridgeplot', ridgeplot_alpha=.5, ridgeplot_overlap=1.5, colors='#99c2ff', hdi_prob=.999, linewidth=.5) ax[1].set_yticklabels(sorted(names, reverse=True)) ax[1].set_title('Estimated Defense Effect (Negative is Better)', loc='left') ax[1].grid(True)
trace = pm.sample(2000, tune=1000, random_seed=RANDOM_SEED, target_accept=0.95, init="advi+adapt_diag") with ic_model: ppc = pm.sample_posterior_predictive(trace) az_data = az.from_pymc3(trace=trace, posterior_predictive=ppc, model=ic_model, coords={"episodes": epi_enc.classes_}, dims={"κ": ["episodes"]}) az.plot_energy(az_data, figsize=(6, 4)) vnames = ["κ", "β", "σ_y"] sum_df = az.summary(az_data, var_names=vnames, round_to=3) sum_df.index = ["κ: " + x for x in list(epi_enc.classes_)] + ["β", "σ_y"] sum_df # ## Estimates # # In the Table above, we present the results of the exponential decay model. # The $\kappa$ parameter ranges from a low of 0.02 during the Great Recession (GFC) # to a high of 0.4 in the aftermath of Katrina. The estimate for the current COVID # episode is in between the 2 but subject to considerable uncertainty given we # are still early in the process. As more data becomes available, # the 95% credibility intervals will shrink but we have to be # cognizant of the fact that any projections will of necessity be
""" Energy Plot =========== _thumb: .7, .5 """ import arviz as az data = az.load_arviz_data("centered_eight") ax = az.plot_energy(data, figsize=(12, 8), backend="bokeh")
trace = pm.sample(3000, cores=2, return_inferencedata=True) az.summary(trace) az.summary(trace.posterior['intercept']) pm.traceplot(trace) plt.show() az.plot_trace(trace) az.plot_trace(trace, var_names=['intercept']) plt.show() az.plot_trace(trace, var_names=['sigma']) plt.show() az.plot_forest(trace, r_hat=True) plt.show() az.plot_posterior(trace) plt.show() az.plot_energy(trace) plt.show() az.plot_autocorr(trace, var_names=["intercept", "sigma"]) plt.show() # map() by hand def negPost(x, *args): y = args[0] p = -1 * sum(norm.logpdf(y, x[0], x[1]))[0] + norm.logpdf( x[0], 178, 20) + uniform.logpdf(x[1], 0, 50) return p negPost((178, 3), d2) secondDerivPost = nd.Derivative(negPost, n=2, full_output=True)