def compare_waic_for_models(prior_type): model_trace_dict = {} if prior_type == 'weak': prior_b_std = [100] else: prior_b_std = [5] for degree in range(1, 7): for prior_b_sigma in prior_b_std: model, trace = load_model_trace( 'chapter_06/fitted_models/M%d_b_std_%d.pkl' % (degree, prior_b_sigma)) model.name = 'M%d_b_std_%d.pkl' % (degree, prior_b_sigma) model_trace_dict[model.name] = trace df_comp_WAIC = pm.compare(model_trace_dict) st.table( df_comp_WAIC.style.format({ 'waic': '{:.2f}', 'p_waic': '{:.2f}', 'd_waic': '{:.2f}', 'weight': '{:.2f}', 'se': '{:.2f}', 'dse': '{:.2f}' })) fig, ax = plt.subplots(figsize=(6, 6)) pm.compareplot(df_comp_WAIC) st.pyplot()
def model_comparison_WAIC(models, path, file_id, MODEL_NAME_MAP, should_plot=True, export=True): """Conduct some model comparison using WAIC, give a list of models""" traces = [model.trace for model in models] models = [model.model for model in models] WAIC = (pm.compare(traces, models).rename(index=MODEL_NAME_MAP)) if should_plot is True: pm.compareplot(WAIC) if export is True: plt.savefig(f'{path}/{file_id}_WAIC.pdf', format='pdf', bbox_inches='tight') plt.cla() return WAIC
def compare(models, labels=None, insample_dev=False, **kwargs): """Easier model comparison for BAMBI models Automatically expands model terms into formulas and sets them as model names :param models: list of BAMBI model objects :param kwargs: keyword args for PyMC3 model comparison function :returns: tuple of matplotlib figure object of model comparison and pandas DataFrame of model statistics """ traces = dict() if type(models) is dict: for label, model in models.items(): traces[label] = model.backend.trace else: for model in models: traces[' + '.join(model.terms.keys())] = model.backend.trace comparison = pm.compare(traces, **kwargs) g = pm.compareplot(comparison, insample_dev=insample_dev) return g, comparison
plt.savefig("pooled_trace.png") plt.close() pooled_model.name = "Pooled" unpooled_model.name = "Unpooled" hierarchical_model.name = "Hierarchical" dfComp = pm.compare( { hierarchical_model: hierarchical_trace, pooled_model: pooled_trace, unpooled_model: unpooled_trace }, ic="LOO") print(dfComp) pm.compareplot(dfComp) plt.tight_layout() plt.savefig("compare.png") plt.close() g = sns.FacetGrid(df, col="Year", col_wrap=5) g = g.map(plt.scatter, "UnionMembership", "DemShare") x = np.linspace(-2, 2, 100) for i, ax in enumerate(g.axes.flat): p_state = hierarchical_trace[ "a_year"][:, i] + hierarchical_trace["b_year"][:, i] * x[:, None] p_mean = np.mean(p_state, axis=1) ax.plot(x, p_mean, color='r') p_state = unpooled_trace[ "a_year"][:, i] + unpooled_trace["b_year"][:, i] * x[:, None] p_mean = np.mean(p_state, axis=1)
ax.loglog(freqs, beam_amp * beam_model(freqs), 'r:', label='PSF') ax.set_xlabel("Freq. (1 / pix)") ax.legend(frameon=True, loc='upper right') ax.axvline(1 / beam_size, linestyle=':', linewidth=4, alpha=0.8, color='gray') ax.grid() # Model comparison plot ax2 = plt.subplot(212) # pm.compareplot(df_comp_WAIC, ax=ax2) pm.compareplot(df_comp_LOO, ax=ax2) plt.tight_layout() plot_savename = osjoin(plot_folder, "{0}.pspec_modelcompare.png".format(filename.rstrip(".fits"))) plt.savefig(plot_savename) plot_savename = osjoin(plot_folder, "{0}.pspec_modelcompare.pdf".format(filename.rstrip(".fits"))) plt.savefig(plot_savename) plt.close() tr_plot = pm.traceplot(trace_brok) plot_savename = osjoin(plot_folder, "{0}.brok_pspec_traceplot.pdf".format(filename.rstrip(".fits"))) plt.savefig(plot_savename) plt.close()
betaR = pm.Normal('betaR', sigma = 1) betaA = pm.Normal('betaA', sigma = 1) betaAR = pm.Normal('betaAR', sigma = 1) sigma = pm.Uniform('sigma', upper = 10) gamma = betaR + betaAR*dd.cont_africa.values mu = alpha + gamma*dd.rugged.values + betaA*dd.cont_africa.values log_gdp = pm.Normal('log_gdp',mu=mu, sigma = sigma, observed = dd.log_gdp.values) tracem75 = pm.sample(draws=1000, tune = 1000) pm.summary(tracem75) # 7.8 m7_5.name = 'm75' comp = pm.compare({m7_3:tracem73, m7_4:tracem74, m7_5: tracem75}) comp pm.compareplot(comp) # 7.9 with pm.Model() as m7_5b: alpha = pm.Normal('alpha', mu = 8, sigma = 100) betaR = pm.Normal('betaR', sigma = 1) betaA = pm.Normal('betaA', sigma = 1) betaAR = pm.Normal('betaAR', sigma = 1) sigma = pm.Uniform('sigma', upper = 10) mu = alpha + betaR*dd.rugged + betaAR*dd.rugged.values*dd.cont_africa.values + betaA*dd.cont_africa.values log_gdp = pm.Normal('log_gdp',mu=mu, sigma = sigma, observed = dd.log_gdp.values) tracem75b = pm.sample(draws=1000, tune = 1000) m7_5b.name = 'm75b' pm.summary(tracem75b) comp_ = pm.compare({m7_5:tracem75, m7_5b:tracem75b})
def fitCompare(data, subject, n_tries=1, overwrite=False, progressbar=True): """ Perform fitting of GLAM variants and WAIC model comparisons for a single subject 1) Multiplicative vs Additive 3) Multiplicative vs No Bias 4) Multiplicative vs Additive vs No Bias """ print("Processing subject {}...".format(subject)) # Subset data subject_data = data[data['subject'] == subject].copy() n_items = subject_data['n_items'].values[0] if n_items == 2: subject_data = subject_data.drop(['item_value_2', 'gaze_2'], axis=1) subject_data['subject'] = 0 # model specifiations model_names = ('GLAM', 'additive', 'nobias') drifts = ('multiplicative', 'additive', 'multiplicative') parameter_sets = (['v', 's', 'tau', 'gamma'], ['v', 's', 'tau', 'gamma'], ['v', 's', 'tau']) gamma_bounds = ((-10, 1), (-100, 100), (-10, 1)) gamma_vals = (None, None, 1.0) # fit models converged_models = np.ones(len(model_names)) models = len(model_names) * [None] for i, (model_name, drift, parameters, gamma_bound, gamma_val) in enumerate( zip(model_names, drifts, parameter_sets, gamma_bounds, gamma_vals)): print('\tS{}: {}'.format(subject, model_name)) model, is_converged = fit_indModel(subject_data, subject, drift=drift, parameters=parameters, gamma_bounds=gamma_bound, gamma_val=gamma_val, t0_val=0, model_name=model_name) models[i] = model converged_models[i] = np.int(is_converged) if not is_converged: break # re-sample all converged models, if any model did not converge if np.any(converged_models == 0): for i in np.where(converged_models == 1)[0]: print('\tRe-sampling S{}: {}'.format(subject, model_name)) model, is_converged = fit_indModel(subject_data, subject, drift=drifts[i], parameters=parameter_sets[i], gamma_bounds=gamma_bounds[i], gamma_val=gamma_vals[i], t0_val=0, model_name=model_names[i], n_tries_max=0) models[i] = model # un-pack models if np.any(models == None): raise ValueError('Model {} not sampled.'.format( model_names[models == None])) multiplicative, additive, nobias = models # Individual Model Comparisons # 1) Multiplicative vs Additive try: waic_df = pm.compare( { additive.model[0]: additive.trace[0], multiplicative.model[0]: multiplicative.trace[0] }, ic='WAIC') path = os.path.join('results', 'model_comparison', 'additive_vs_multiplicative') make_sure_path_exists(path) make_sure_path_exists(path + '/plots/') waic_df.to_csv( os.path.join( path, 'additive_vs_multiplicative_{}_waic.csv'.format(subject))) pm.compareplot(waic_df) plt.savefig( os.path.join( 'results', 'model_comparison', 'additive_vs_multiplicative', 'plots', 'additive_vs_multiplicative_{}_waic.png'.format(subject))) plt.close() except: print(' /!\ Error in WAIC comparison for subject {}'.format(subject)) # 2) Multiplicative vs No Bias try: waic_df = pm.compare( { multiplicative.model[0]: multiplicative.trace[0], nobias.model[0]: nobias.trace[0] }, ic='WAIC') path = os.path.join('results', 'model_comparison', 'multiplicative_vs_nobias') make_sure_path_exists(path) make_sure_path_exists(path + '/plots/') waic_df.to_csv( os.path.join( path, 'multiplicative_vs_nobias_{}_waic.csv'.format(subject))) pm.compareplot(waic_df) plt.savefig( os.path.join( 'results', 'model_comparison', 'multiplicative_vs_nobias', 'plots', 'multiplicative_vs_nobias_{}_waic.png'.format(subject))) plt.close() except: print(' /!\ Error in WAIC comparison for subject {}'.format(subject)) # 3) Multiplicative vs Additive vs No Bias try: waic_df = pm.compare( { multiplicative.model[0]: multiplicative.trace[0], additive.model[0]: additive.trace[0], nobias.model[0]: nobias.trace[0] }, ic='WAIC') path = os.path.join('results', 'model_comparison', 'additive_vs_multiplicative_vs_nobias') make_sure_path_exists(path) make_sure_path_exists(path + '/plots/') waic_df.to_csv( os.path.join( path, 'additive_vs_multiplicative_vs_nobias_{}_waic.csv'.format( subject))) pm.compareplot(waic_df) plt.savefig( os.path.join( 'results', 'model_comparison', 'additive_vs_multiplicative_vs_nobias', 'plots', 'additive_vs_multiplicative_vs_nobias_{}_waic.png'.format( subject))) plt.close() except: print(' /!\ Error in WAIC comparison for subject {}'.format(subject)) return True
def get_weights(self, predictions_aapl, predictions_msft, predictions_bac, observations_aapl): N_SAMPLES = 1000 N_TUNES = 1000 sigma_start = np.std(observations_aapl) aplha_start = 1 beta_start = 0 # predictions_shared = theano.shared(predictions_aapl) predictions = np.stack( [predictions_aapl, predictions_msft, predictions_bac]) with pm.Model() as model: sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start) alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start, shape=3) beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start, shape=3) mu = alpha * predictions + beta p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl) trace_model = pm.sample(N_SAMPLES, tune=N_TUNES) with pm.Model() as model_aapl: sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start) alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start) beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start) mu = alpha * predictions_aapl + beta p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl) trace_model_aapl = pm.sample(N_SAMPLES, tune=N_TUNES) with pm.Model() as model_msft: sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start) alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start) beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start) mu = alpha * predictions_msft + beta p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl) trace_model_msft = pm.sample(N_SAMPLES, tune=N_TUNES) with pm.Model() as model_bac: sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start) alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start) beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start) mu = alpha * predictions_bac + beta p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl) trace_model_bac = pm.sample(N_SAMPLES, tune=N_TUNES) compare_1 = pm.compare( [trace_model_aapl, trace_model_msft, trace_model_bac], [model_aapl, model_msft, model_bac], method='pseudo-BMA') compare_2 = pm.compare( [trace_model_msft, trace_model_bac, trace_model_aapl], [model_msft, model_bac, model_aapl], method='pseudo-BMA') compare_3 = pm.compare( [trace_model_aapl, trace_model_msft, trace_model_bac], [model_aapl, model_msft, model_bac], method='BB-pseudo-BMA') compare_4 = pm.compare( [trace_model_aapl, trace_model_msft, trace_model_bac], [model_aapl, model_msft, model_bac], method='stacking') compare_5 = pm.compare([trace_model_msft, trace_model_bac], [model_msft, model_bac], method='pseudo-BMA') compare_6 = pm.compare([trace_model_aapl, trace_model_msft], [model_aapl, model_msft], method='BB-pseudo-BMA') compare_7 = pm.compare([trace_model_aapl, trace_model_msft], [model_aapl, model_msft], method='stacking') # pm.traceplot(trace_model) d = pd.read_csv('data/milk.csv', sep=';') d['neocortex'] = d['neocortex.perc'] / 100 d.dropna(inplace=True) d.shape a_start = d['kcal.per.g'].mean() sigma_start = d['kcal.per.g'].std() mass_shared = theano.shared(np.log(d['mass'].values)) neocortex_shared = theano.shared(d['neocortex'].values) with pm.Model() as m6_11: alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start) mu = alpha + 0 * neocortex_shared sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start) kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g']) trace_m6_11 = pm.sample(1000, tune=1000) pm.traceplot(trace_m6_11) with pm.Model() as m6_12: alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start) beta = pm.Normal('beta', mu=0, sd=10) sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start) mu = alpha + beta * neocortex_shared kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g']) trace_m6_12 = pm.sample(1000, tune=1000) with pm.Model() as m6_13: alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start) beta = pm.Normal('beta', mu=0, sd=10) sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start) mu = alpha + beta * mass_shared kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g']) trace_m6_13 = pm.sample(1000, tune=1000) with pm.Model() as m6_14: alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start) beta = pm.Normal('beta', mu=0, sd=10, shape=2) sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start) mu = alpha + beta[0] * mass_shared + beta[1] * neocortex_shared kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g']) trace_m6_14 = pm.sample(1000, tune=1000) pm.waic(trace_m6_14, m6_14) compare_df = pm.compare( [trace_m6_11, trace_m6_12, trace_m6_13, trace_m6_14], [m6_11, m6_12, m6_13, m6_14], method='pseudo-BMA') compare_df.loc[:, 'model'] = pd.Series( ['m6.11', 'm6.12', 'm6.13', 'm6.14']) compare_df = compare_df.set_index('model') compare_df pm.compareplot(compare_df)
# 可靠度函数 ax = plt.subplot(1, 1, 1) t = np.arange(1, 7, 1) R1 = np.exp(-((t / post_beta_mu1)**post_alpha1)) R2 = np.exp(-((t / hpd25_beta_mu)**hpd2_5_alpha)) R3 = np.exp(-((t / hpd975_beta_mu)**hpd97_5_alpha)) # plt.plot(t, R2, 'k-', t, R1, 'bo--', t, R3, 'r') plt.plot(t, R2, 'k-', t, R3, 'r') ax.legend([u'可靠度区间2.5', u'可靠度均值', u'可靠度区间97.5'], prop=font) plt.show() print(pm.dic(trace2, unpooled_model)) A = pm.compare([trace1, trace2], [pooled_model, unpooled_model], ic='WAIC') print(A) pm.compareplot(A) plt.show() # 进行预测 # elec_year1 = elec_year # elec_year1[0:84] = 7 # elec_year1[5:42:6] = 7 # elec_year1 = int(np.ones(len(elec_faults))*7) print(elec_faults.mean()) # elec_faults2 = np.zeros(len(elec_faults)) x_shared.set_value(np.asarray(test_year)) # y_shared.set_value(elec_faults2) Num_shared.set_value(np.asarray(test_abc)) # print(elec_faults.mean()) with unpooled_model: post_pred = pm.sample_ppc(trace2)
labels = ['Data', 'Low', 'Mean', 'High'] ax.legend(handles, labels) ax.grid() plt.show() # ************************************************************************************************* # Compute WAIC for both models waic_base = pm.waic(trace_base, model_base) waic_sex = pm.waic(trace_sex, model_sex) # Set model names model_base.name = 'base' model_sex.name = 'sex' # Comparison of WAIC comp_WAIC_base_v_sex = pm.compare({model_base: trace_base, model_sex: trace_sex}) display(comp_WAIC_base_v_sex) pm.compareplot(comp_WAIC_base_v_sex) # Generate the posterior predictive in both base and sex models try: post_pred_base = vartbl['post_pred_base'] post_pred_sex = vartbl['post_pred_sex'] print(f'Loaded posterior predictive for base and sex models.') except: with model_base: post_pred_base = pm.sample_ppc(trace_base) with model_sex: post_pred_sex = pm.sample_ppc(trace_sex) vartbl['post_pred_base'] = post_pred_base vartbl['post_pred_sex'] = post_pred_sex save_vartbl(vartbl, fname)