def test_compare(): np.random.seed(42) x_obs = np.random.normal(0, 1, size=100) with pm.Model() as model0: mu = pm.Normal('mu', 0, 1) x = pm.Normal('x', mu=mu, sd=1, observed=x_obs) trace0 = pm.sample(1000) with pm.Model() as model1: mu = pm.Normal('mu', 0, 1) x = pm.Normal('x', mu=mu, sd=0.8, observed=x_obs) trace1 = pm.sample(1000) with pm.Model() as model2: mu = pm.Normal('mu', 0, 1) x = pm.StudentT('x', nu=1, mu=mu, lam=1, observed=x_obs) trace2 = pm.sample(1000) traces = [trace0, copy.copy(trace0)] models = [model0, copy.copy(model0)] model_dict = dict(zip(models, traces)) w_st = compare(model_dict, method='stacking')['weight'] w_bb_bma = compare(model_dict, method='BB-pseudo-BMA')['weight'] w_bma = compare(model_dict, method='pseudo-BMA')['weight'] assert_almost_equal(w_st[0], w_st[1]) assert_almost_equal(w_bb_bma[0], w_bb_bma[1]) assert_almost_equal(w_bma[0], w_bma[1]) assert_almost_equal(np.sum(w_st), 1.) assert_almost_equal(np.sum(w_bb_bma), 1.) assert_almost_equal(np.sum(w_bma), 1.) traces = [trace0, trace1, trace2] models = [model0, model1, model2] model_dict = dict(zip(models, traces)) w_st = pm.compare(model_dict, method='stacking')['weight'] w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight'] w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight'] assert (w_st[0] > w_st[1] > w_st[2]) assert (w_bb_bma[0] > w_bb_bma[1] > w_bb_bma[2]) assert (w_bma[0] > w_bma[1] > w_bma[2]) assert_almost_equal(np.sum(w_st), 1.) assert_almost_equal(np.sum(w_st), 1.) assert_almost_equal(np.sum(w_st), 1.)
def compare_waic_for_models(prior_type): model_trace_dict = {} if prior_type == 'weak': prior_b_std = [100] else: prior_b_std = [5] for degree in range(1, 7): for prior_b_sigma in prior_b_std: model, trace = load_model_trace( 'chapter_06/fitted_models/M%d_b_std_%d.pkl' % (degree, prior_b_sigma)) model.name = 'M%d_b_std_%d.pkl' % (degree, prior_b_sigma) model_trace_dict[model.name] = trace df_comp_WAIC = pm.compare(model_trace_dict) st.table( df_comp_WAIC.style.format({ 'waic': '{:.2f}', 'p_waic': '{:.2f}', 'd_waic': '{:.2f}', 'weight': '{:.2f}', 'se': '{:.2f}', 'dse': '{:.2f}' })) fig, ax = plt.subplots(figsize=(6, 6)) pm.compareplot(df_comp_WAIC) st.pyplot()
def model_gen_fit(model_specs, test, subj_idx, thresh_iqr=5, **sample_kws): """Test model fitting for generated data.""" n_model = len(model_specs) n_subj = len(np.unique(subj_idx)) model_names = [spec['name'] for spec in model_specs] # initialize output variables for all comparisons comp_vars = [ 'rank', 'loo', 'p_loo', 'd_loo', 'weight', 'se', 'dse', 'warning' ] results = {'winner': np.zeros((n_model, n_model), dtype=int)} for var in comp_vars: shape = (n_model, n_model) if var == 'warning': results[var] = np.zeros(shape, dtype=bool) elif var == 'rank': results[var] = np.zeros(shape, dtype=int) else: results[var] = np.zeros(shape) for i, gen_spec in enumerate(model_specs): # generate a parameter set param, subj_param = sample_params(gen_spec['fixed'], gen_spec['param'], gen_spec['subj_param'], n_subj) # generate data from the random parameters gen_model = gen_spec['model'] raw = gen_model.gen(test, param, subj_idx, subj_param=subj_param) # remove extreme and missing values data = task.scrub_rt(raw, thresh_iqr) rt = data.rt.values response = data.response.values samp_test = data.test_type.values samp_subj = data.subj_idx.values all_trace = {} for j, fit_spec in enumerate(model_specs): fit_model = fit_spec['model'] graph = fit_model.init_graph_hier(rt, response, samp_test, samp_subj) trace = pm.sample(model=graph, **sample_kws) all_trace[fit_spec['name']] = trace # compare models df_comp = pm.compare(all_trace, ic='LOO', method='BB-pseudo-BMA', b_samples=10000) # save results in correct position for j, name in enumerate(model_names): for var in comp_vars: results[var][i, j] = df_comp.loc[name, var] results['winner'][i, j] = 1 if df_comp.loc[name, 'rank'] == 0 else 0 return results
def comp_e(model_matrix, file_name): # fit the two models model_rbfcls, trace_rbfcls = fit_choice_models.sample_hier_rbf_cls(model_matrix) model_rbfkal, trace_rbfkal = fit_choice_models.sample_heir_rbf_kal(model_matrix) # compare df_comp_loo = pm.compare( { model_rbfcls: trace_rbfcls, model_rbfkal: trace_rbfkal, }, ic='LOO') df_comp_loo.rename(index={0: 'RBF/Cluster', 1: 'RBF/Kalman'}, inplace=True) df_comp_loo.to_pickle(file_name)
def test_compare(): np.random.seed(42) x_obs = np.random.normal(0, 1, size=100) with pm.Model() as model0: mu = pm.Normal('mu', 0, 1) x = pm.Normal('x', mu=mu, sd=1, observed=x_obs) trace0 = pm.sample(1000) with pm.Model() as model1: mu = pm.Normal('mu', 0, 1) x = pm.Normal('x', mu=mu, sd=0.8, observed=x_obs) trace1 = pm.sample(1000) with pm.Model() as model2: mu = pm.Normal('mu', 0, 1) x = pm.StudentT('x', nu=1, mu=mu, lam=1, observed=x_obs) trace2 = pm.sample(1000) traces = [trace0, copy.copy(trace0)] models = [model0, copy.copy(model0)] model_dict = dict(zip(models, traces)) w_st = pm.compare(model_dict, method='stacking')['weight'] w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight'] w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight'] assert_almost_equal(w_st[0], w_st[1]) assert_almost_equal(w_bb_bma[0], w_bb_bma[1]) assert_almost_equal(w_bma[0], w_bma[1]) assert_almost_equal(np.sum(w_st), 1.) assert_almost_equal(np.sum(w_bb_bma), 1.) assert_almost_equal(np.sum(w_bma), 1.) traces = [trace0, trace1, trace2] models = [model0, model1, model2] model_dict = dict(zip(models, traces)) w_st = pm.compare(model_dict, method='stacking')['weight'] w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight'] w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight'] assert(w_st[0] > w_st[1] > w_st[2]) assert(w_bb_bma[0] > w_bb_bma[1] > w_bb_bma[2]) assert(w_bma[0] > w_bma[1] > w_bma[2]) assert_almost_equal(np.sum(w_st), 1.) assert_almost_equal(np.sum(w_st), 1.) assert_almost_equal(np.sum(w_st), 1.)
def model_comparison_WAIC(models, path, file_id, MODEL_NAME_MAP, should_plot=True, export=True): """Conduct some model comparison using WAIC, give a list of models""" traces = [model.trace for model in models] models = [model.model for model in models] WAIC = (pm.compare(traces, models).rename(index=MODEL_NAME_MAP)) if should_plot is True: pm.compareplot(WAIC) if export is True: plt.savefig(f'{path}/{file_id}_WAIC.pdf', format='pdf', bbox_inches='tight') plt.cla() return WAIC
def compare(models, labels=None, insample_dev=False, **kwargs): """Easier model comparison for BAMBI models Automatically expands model terms into formulas and sets them as model names :param models: list of BAMBI model objects :param kwargs: keyword args for PyMC3 model comparison function :returns: tuple of matplotlib figure object of model comparison and pandas DataFrame of model statistics """ traces = dict() if type(models) is dict: for label, model in models.items(): traces[label] = model.backend.trace else: for model in models: traces[' + '.join(model.terms.keys())] = model.backend.trace comparison = pm.compare(traces, **kwargs) g = pm.compareplot(comparison, insample_dev=insample_dev) return g, comparison
def model_uncertainty(splits, stakes, actions, temp=1., sd=1.): with pm.Model() as repeated_model: r = pm.Gamma('r', alpha=1, beta=1) p = pm.Gamma('p', alpha=1, beta=1) t = pm.Beta('t', alpha=2, beta=5) st = pm.Beta('st', alpha=1, beta=1) c = pm.Gamma('c', alpha=1, beta=1) odds_a = np.exp(2 * r * splits + c * stakes**st) odds_r = np.exp(p * (splits < 0.5 - t / 2)) p = odds_a / (odds_r + odds_a) a = pm.Binomial('a', 1, p, observed=actions) fitted = pm.fit(method='advi') trace_repeated = fitted.sample(2000) # trace_repeated = pm.sample(200000, step=pm.Slice(), chains=2, cores=4) # with pm.Model() as simple_model: # r = pm.Normal('r', mu=0, sd=1) # p = np.exp(r*splits) / (1 + np.exp(r*splits)) # a = pm.Binomial('a', 1, p, observed=actions) # trace_simple = pm.sample(2000, init='map') with pm.Model() as fairness_model: r = pm.Gamma('r', alpha=1, beta=1) t = pm.Beta('t', alpha=2, beta=5) f = pm.Normal('f', mu=0, sd=sd) st = pm.Beta('st', alpha=1, beta=1) c = pm.Gamma('c', alpha=1, beta=1) odds = np.exp(c * stakes**st + splits * r - f * (splits < 0.5 - t / 2)) p = odds / (1 + odds) a = pm.Binomial('a', 1, p, observed=actions) fitted = pm.fit(method='advi') trace_fairness = fitted.sample(2000) # trace_fairness = pm.sample(200000, step=pm.Slice(), chains=2, cores=4) fairness_model.name = 'fair' repeated_model.name = 'repeated' model_dict = dict( zip([fairness_model, repeated_model], [trace_fairness, trace_repeated])) comp = pm.compare(model_dict, ic='LOO', method='BB-pseudo-BMA') return trace_fairness, trace_repeated, comp
def compare(models): """ Compare models on WAIC (and some other measures) In: fitted_models: iterable of fitted Model instances Out: DataFrame, indexed by model names, columns having comparison values """ # variable needs to be Series instead of just list b/c 'pm.compare' returns dataframe which is sorted # by information criterion value. need to match model names to entries of that dataframe by index, # which indicates initial position of the model when given to this function # note: silly design by pymc3 model_names = pd.Series([fm.name for fm in models]) model_dict = {fm.model: fm.trace for fm in models} return (pm.compare( model_dict=model_dict, method='BB-pseudo-BMA').assign(model=model_names).set_index('model'))
def get_prediction_weights(self, predictions, observations, N_SAMPLES=1000, N_TUNES=1000, method='stacking'): if predictions.shape[1] == 0: weak_predictors_num = predictions.shape[0] return np.full((weak_predictors_num), 1 / weak_predictors_num) sigma_start = np.std(observations) aplha_start = 1 beta_start = 0 models = [] traces = [] for i in range(predictions.shape[0]): p = predictions[i, :] with pm.Model() as model: sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start) alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start) beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start) mu = alpha * p + beta likelihood = pm.Normal('likelihood', mu=mu, sd=sigma, observed=observations) trace = pm.sample(N_SAMPLES, tune=N_TUNES) models.append(model) traces.append(trace) compare_ds = pm.compare(traces, models, method=method) return compare_ds.weight.sort_index(ascending=True)
plt.show() az.plot_forest(trace) plt.show() # might need to multiply by -2 to compare with McElreath with m6_11: print(pm.waic(trace)) print(pm.loo(trace)) #m6_13 = pm.Model() with pm.Model() as m6_13: alpha = pm.Uniform('alpha', 0, 5) bm = pm.Uniform('bm', -10, 10) log_sigma = pm.Uniform('log_sigma', -10, 10) mu = alpha + bm*d['lmass'] y_obs = pm.Normal('y_obs', mu=mu, sigma=np.exp(log_sigma), observed=d['kcal.per.g']) trace = pm.sample(2000, return_inferencedata=True, chains=2) with m6_13: print(pm.summary(trace)) print(pm.waic(trace)) print(pm.loo(trace)) with m6_13: print(-2*pm.loo(trace)) pm.compare({m6_11: trace, m6_13: trace}) pm.compare({m6_11: trace, m6_13: trace}, ic='WAIC')
plot_poserterior_mean(tracem73['mu'], dd.rugged, dd.log_gdp) # 7.4 with pm.Model() as m7_4: alpha = pm.Normal('alpha', mu = 8, sigma = 100) beta = pm.Normal('beta', sigma = 1) beta2 = pm.Normal('beta2', sigma = 1) sigma = pm.Uniform('sigma', upper = 10) mu = pm.Deterministic('mu', alpha + beta*dd.rugged.values + beta2*dd.cont_africa.values) log_gdp = pm.Normal('log_gdp',mu=mu, sigma = sigma, observed = dd.log_gdp.values) tracem74 = pm.sample(draws=1000, tune = 1000) # 7.5 m7_3.name = 'm73' m7_4.name = 'm74' pm.compare({m7_3:tracem73, m7_4:tracem74}) # 7.6 rugged_seq = np.arange(-1,8,.25) mu_Af = np.zeros((len(rugged_seq),tracem74['mu'].shape[0])) mu_noAf = np.zeros((len(rugged_seq),tracem74['mu'].shape[0])) for row, seq in enumerate(rugged_seq): mu_Af[row,:] = tracem74['alpha'] + tracem74['beta']*rugged_seq[row] + tracem74['beta2']*1 mu_noAf[row,:] = tracem74['alpha'] + tracem74['beta']*rugged_seq[row] + tracem74['beta2']*0 hpd_af = az.hpd(mu_Af.T,credible_interval=.97) hpd_noaf = az.hpd(mu_noAf.T,credible_interval=.97) plt.plot(da1.rugged, da1.log_gdp, marker = 'o', linestyle = '', color = 'blue')
hpd25_beta_mu = hpd2_5[1:].mean() hpd975_beta_mu = hpd97_5[1:].mean() # 可靠度函数 ax = plt.subplot(1, 1, 1) t = np.arange(1, 7, 1) R1 = np.exp(-((t / post_beta_mu1)**post_alpha1)) R2 = np.exp(-((t / hpd25_beta_mu)**hpd2_5_alpha)) R3 = np.exp(-((t / hpd975_beta_mu)**hpd97_5_alpha)) # plt.plot(t, R2, 'k-', t, R1, 'bo--', t, R3, 'r') plt.plot(t, R2, 'k-', t, R3, 'r') ax.legend([u'可靠度区间2.5', u'可靠度均值', u'可靠度区间97.5'], prop=font) plt.show() print(pm.dic(trace2, unpooled_model)) A = pm.compare([trace1, trace2], [pooled_model, unpooled_model], ic='WAIC') print(A) pm.compareplot(A) plt.show() # 进行预测 # elec_year1 = elec_year # elec_year1[0:84] = 7 # elec_year1[5:42:6] = 7 # elec_year1 = int(np.ones(len(elec_faults))*7) print(elec_faults.mean()) # elec_faults2 = np.zeros(len(elec_faults)) x_shared.set_value(np.asarray(test_year)) # y_shared.set_value(elec_faults2) Num_shared.set_value(np.asarray(test_abc)) # print(elec_faults.mean())
# Model with no collinearity #%% with pm.Model() as model_no_collinear: a = pm.Normal('a', mu=10, sigma=100) br = pm.Normal('br', mu=2, sigma=10) sigma = pm.Uniform('sigma', lower=0, upper=10) mu = pm.Deterministic('mu', a + br * leg_right) h = pm.Normal('h', mu=mu, sigma=sigma, observed=height) trace_no_collinear = pm.sample(cores=2) #%% model_collinear.name = 'collinear' model_no_collinear.name = 'no-collinear' df_comp_models = pm.compare({ model_collinear: trace_collinear, model_no_collinear: trace_no_collinear }) df_comp_models #%% pm.forestplot(trace_collinear, var_names=['a', 'bl', 'br', 'sigma']) pm.forestplot(trace_no_collinear, var_names=['a', 'br', 'sigma']) # Posterior predictive #%% collinear_ppc = pm.sample_posterior_predictive(trace_collinear, samples=500, model=model_collinear) no_collinear_ppc = pm.sample_posterior_predictive(trace_no_collinear, samples=500, model=model_no_collinear)
# Regression mu = a + bA * age happy_hat = pm.Normal('happy_hat', mu=mu, sd=sigma, observed=happiness) # Prior sampling, trace definition and posterior sampling prior = pm.sample_prior_predictive(samples=30) posterior_610 = pm.sample() posterior_pred_610 = pm.sample_posterior_predictive(posterior_610) az.summary(posterior_610, credible_interval=.89).round(2) pm.traceplot(posterior_610) model_69.name = 'model_69' model_610.name = 'model_610' pm.compare({ model_69: posterior_69, model_610: posterior_610 }) #The model that produces the invalid inference, m6.9, is expected to predict much better. #And it would. This is because the collider path does convey actual association. #We simply end up mistaken about the causal inference. #We should not use WAIC (or LOO) to choose among models, unless we have some clear sense of the causal model. #Q3 d = pd.read_csv('../../data/foxes.csv', sep=';', header=0) d.head() d[['avgfood', 'groupsize', 'area', 'weight' ]] = preprocessing.scale(d[['avgfood', 'groupsize', 'area', 'weight']]) d.head() avgfood = theano.shared(np.array(d.avgfood))
beta = pm.HalfNormal('beta', sd=10.) pm.Cauchy('returns', alpha=0.0, beta=beta, observed=returns) mean_field = pm.fit(n=150000, method='advi', obj_optimizer=pm.adam(learning_rate=.001)) trace2 = mean_field.sample(draws=10000) preds2 = pm.sample_ppc(trace2, samples=10000, model=model2) y2 = np.reshape(np.mean(preds2['returns'], axis=0), [-1]) fig, (ax1, ax2) = plt.subplots(1, 2) ax1.hist(y2) ax1.set_title('Cauchy distribution returns') ax2.hist(returns) ax2.set_title('Real returns') plt.show() print "Estimating LOO..." # Let's compare the fit of both models model1.name = 'Gaussian model' model2.name = 'Cauchy model' df_LOO = pm.compare({model1: trace, model2: trace2}, ic='LOO') print "LOO comparison table: ", df_LOO
# Legend handles = [p1[0], p2[0], p3[0], p4[0]] labels = ['Data', 'Low', 'Mean', 'High'] ax.legend(handles, labels) ax.grid() plt.show() # ************************************************************************************************* # Compute WAIC for both models waic_base = pm.waic(trace_base, model_base) waic_sex = pm.waic(trace_sex, model_sex) # Set model names model_base.name = 'base' model_sex.name = 'sex' # Comparison of WAIC comp_WAIC_base_v_sex = pm.compare({model_base: trace_base, model_sex: trace_sex}) display(comp_WAIC_base_v_sex) pm.compareplot(comp_WAIC_base_v_sex) # Generate the posterior predictive in both base and sex models try: post_pred_base = vartbl['post_pred_base'] post_pred_sex = vartbl['post_pred_sex'] print(f'Loaded posterior predictive for base and sex models.') except: with model_base: post_pred_base = pm.sample_ppc(trace_base) with model_sex: post_pred_sex = pm.sample_ppc(trace_sex) vartbl['post_pred_base'] = post_pred_base vartbl['post_pred_sex'] = post_pred_sex
draws=args.ndraws, tune=args.ntune, # backend='saved_gzb_bhsm_trace' ) cot_uniform_bhsm = CotUniformBHSM(galaxies.values) cot_uniform_trace = cot_uniform_bhsm.do_inference( draws=args.ndraws, tune=args.ntune, # backend='saved_gzb_bhsm_trace' ) loo = pm.compare({ bhsm.model: trace, cot_uniform_bhsm.model: cot_uniform_trace }, ic='LOO') print('\n', loo) # save EVERYTHING with open(args.output, "wb") as buff: pickle.dump( { 'normal_model': bhsm, 'normal_trace': trace, 'cot_model': cot_uniform_bhsm, 'cot_trace': cot_uniform_trace, 'loo': loo, 'n_samples': args.ndraws,
def fitCompare(data, subject, n_tries=1, overwrite=False, progressbar=True): """ Perform fitting of GLAM variants and WAIC model comparisons for a single subject 1) Multiplicative vs Additive 3) Multiplicative vs No Bias 4) Multiplicative vs Additive vs No Bias """ print("Processing subject {}...".format(subject)) # Subset data subject_data = data[data['subject'] == subject].copy() n_items = subject_data['n_items'].values[0] if n_items == 2: subject_data = subject_data.drop(['item_value_2', 'gaze_2'], axis=1) subject_data['subject'] = 0 # model specifiations model_names = ('GLAM', 'additive', 'nobias') drifts = ('multiplicative', 'additive', 'multiplicative') parameter_sets = (['v', 's', 'tau', 'gamma'], ['v', 's', 'tau', 'gamma'], ['v', 's', 'tau']) gamma_bounds = ((-10, 1), (-100, 100), (-10, 1)) gamma_vals = (None, None, 1.0) # fit models converged_models = np.ones(len(model_names)) models = len(model_names) * [None] for i, (model_name, drift, parameters, gamma_bound, gamma_val) in enumerate( zip(model_names, drifts, parameter_sets, gamma_bounds, gamma_vals)): print('\tS{}: {}'.format(subject, model_name)) model, is_converged = fit_indModel(subject_data, subject, drift=drift, parameters=parameters, gamma_bounds=gamma_bound, gamma_val=gamma_val, t0_val=0, model_name=model_name) models[i] = model converged_models[i] = np.int(is_converged) if not is_converged: break # re-sample all converged models, if any model did not converge if np.any(converged_models == 0): for i in np.where(converged_models == 1)[0]: print('\tRe-sampling S{}: {}'.format(subject, model_name)) model, is_converged = fit_indModel(subject_data, subject, drift=drifts[i], parameters=parameter_sets[i], gamma_bounds=gamma_bounds[i], gamma_val=gamma_vals[i], t0_val=0, model_name=model_names[i], n_tries_max=0) models[i] = model # un-pack models if np.any(models == None): raise ValueError('Model {} not sampled.'.format( model_names[models == None])) multiplicative, additive, nobias = models # Individual Model Comparisons # 1) Multiplicative vs Additive try: waic_df = pm.compare( { additive.model[0]: additive.trace[0], multiplicative.model[0]: multiplicative.trace[0] }, ic='WAIC') path = os.path.join('results', 'model_comparison', 'additive_vs_multiplicative') make_sure_path_exists(path) make_sure_path_exists(path + '/plots/') waic_df.to_csv( os.path.join( path, 'additive_vs_multiplicative_{}_waic.csv'.format(subject))) pm.compareplot(waic_df) plt.savefig( os.path.join( 'results', 'model_comparison', 'additive_vs_multiplicative', 'plots', 'additive_vs_multiplicative_{}_waic.png'.format(subject))) plt.close() except: print(' /!\ Error in WAIC comparison for subject {}'.format(subject)) # 2) Multiplicative vs No Bias try: waic_df = pm.compare( { multiplicative.model[0]: multiplicative.trace[0], nobias.model[0]: nobias.trace[0] }, ic='WAIC') path = os.path.join('results', 'model_comparison', 'multiplicative_vs_nobias') make_sure_path_exists(path) make_sure_path_exists(path + '/plots/') waic_df.to_csv( os.path.join( path, 'multiplicative_vs_nobias_{}_waic.csv'.format(subject))) pm.compareplot(waic_df) plt.savefig( os.path.join( 'results', 'model_comparison', 'multiplicative_vs_nobias', 'plots', 'multiplicative_vs_nobias_{}_waic.png'.format(subject))) plt.close() except: print(' /!\ Error in WAIC comparison for subject {}'.format(subject)) # 3) Multiplicative vs Additive vs No Bias try: waic_df = pm.compare( { multiplicative.model[0]: multiplicative.trace[0], additive.model[0]: additive.trace[0], nobias.model[0]: nobias.trace[0] }, ic='WAIC') path = os.path.join('results', 'model_comparison', 'additive_vs_multiplicative_vs_nobias') make_sure_path_exists(path) make_sure_path_exists(path + '/plots/') waic_df.to_csv( os.path.join( path, 'additive_vs_multiplicative_vs_nobias_{}_waic.csv'.format( subject))) pm.compareplot(waic_df) plt.savefig( os.path.join( 'results', 'model_comparison', 'additive_vs_multiplicative_vs_nobias', 'plots', 'additive_vs_multiplicative_vs_nobias_{}_waic.png'.format( subject))) plt.close() except: print(' /!\ Error in WAIC comparison for subject {}'.format(subject)) return True
def get_weights(self, predictions_aapl, predictions_msft, predictions_bac, observations_aapl): N_SAMPLES = 1000 N_TUNES = 1000 sigma_start = np.std(observations_aapl) aplha_start = 1 beta_start = 0 # predictions_shared = theano.shared(predictions_aapl) predictions = np.stack( [predictions_aapl, predictions_msft, predictions_bac]) with pm.Model() as model: sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start) alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start, shape=3) beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start, shape=3) mu = alpha * predictions + beta p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl) trace_model = pm.sample(N_SAMPLES, tune=N_TUNES) with pm.Model() as model_aapl: sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start) alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start) beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start) mu = alpha * predictions_aapl + beta p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl) trace_model_aapl = pm.sample(N_SAMPLES, tune=N_TUNES) with pm.Model() as model_msft: sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start) alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start) beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start) mu = alpha * predictions_msft + beta p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl) trace_model_msft = pm.sample(N_SAMPLES, tune=N_TUNES) with pm.Model() as model_bac: sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start) alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start) beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start) mu = alpha * predictions_bac + beta p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl) trace_model_bac = pm.sample(N_SAMPLES, tune=N_TUNES) compare_1 = pm.compare( [trace_model_aapl, trace_model_msft, trace_model_bac], [model_aapl, model_msft, model_bac], method='pseudo-BMA') compare_2 = pm.compare( [trace_model_msft, trace_model_bac, trace_model_aapl], [model_msft, model_bac, model_aapl], method='pseudo-BMA') compare_3 = pm.compare( [trace_model_aapl, trace_model_msft, trace_model_bac], [model_aapl, model_msft, model_bac], method='BB-pseudo-BMA') compare_4 = pm.compare( [trace_model_aapl, trace_model_msft, trace_model_bac], [model_aapl, model_msft, model_bac], method='stacking') compare_5 = pm.compare([trace_model_msft, trace_model_bac], [model_msft, model_bac], method='pseudo-BMA') compare_6 = pm.compare([trace_model_aapl, trace_model_msft], [model_aapl, model_msft], method='BB-pseudo-BMA') compare_7 = pm.compare([trace_model_aapl, trace_model_msft], [model_aapl, model_msft], method='stacking') # pm.traceplot(trace_model) d = pd.read_csv('data/milk.csv', sep=';') d['neocortex'] = d['neocortex.perc'] / 100 d.dropna(inplace=True) d.shape a_start = d['kcal.per.g'].mean() sigma_start = d['kcal.per.g'].std() mass_shared = theano.shared(np.log(d['mass'].values)) neocortex_shared = theano.shared(d['neocortex'].values) with pm.Model() as m6_11: alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start) mu = alpha + 0 * neocortex_shared sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start) kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g']) trace_m6_11 = pm.sample(1000, tune=1000) pm.traceplot(trace_m6_11) with pm.Model() as m6_12: alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start) beta = pm.Normal('beta', mu=0, sd=10) sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start) mu = alpha + beta * neocortex_shared kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g']) trace_m6_12 = pm.sample(1000, tune=1000) with pm.Model() as m6_13: alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start) beta = pm.Normal('beta', mu=0, sd=10) sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start) mu = alpha + beta * mass_shared kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g']) trace_m6_13 = pm.sample(1000, tune=1000) with pm.Model() as m6_14: alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start) beta = pm.Normal('beta', mu=0, sd=10, shape=2) sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start) mu = alpha + beta[0] * mass_shared + beta[1] * neocortex_shared kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g']) trace_m6_14 = pm.sample(1000, tune=1000) pm.waic(trace_m6_14, m6_14) compare_df = pm.compare( [trace_m6_11, trace_m6_12, trace_m6_13, trace_m6_14], [m6_11, m6_12, m6_13, m6_14], method='pseudo-BMA') compare_df.loc[:, 'model'] = pd.Series( ['m6.11', 'm6.12', 'm6.13', 'm6.14']) compare_df = compare_df.set_index('model') compare_df pm.compareplot(compare_df)
def compare(self, trace, ic='waic', scale='deviance'): return pm.compare(trace, ic=ic, scale=scale)
rand_pars.append(pars) # Save the random samples to a npy file randfilename = osjoin(model_comparison_folder, f"{filename}_param_samples.npy") np.save(randfilename, np.array(rand_pars)) # Model comparison via WAIC plaw_model.name = 'plaw' brok_plaw_model.name = 'brok_plaw' # df_comp_WAIC = pm.compare({plaw_model: trace, # brok_plaw_model: trace_brok}, # ic='WAIC') df_comp_LOO = pm.compare({plaw_model: trace, brok_plaw_model: trace_brok}, ic='LOO') plt.figure(figsize=(4.2, 4.2)) ax = plt.subplot(211) # plt.title("Fit_params: {}".format(out[0])) ax.loglog(pspec.freqs.value, pspec.ps1D, 'k', zorder=-10) beam_amp = 10**(max([summ['mean'].logA, -20 if fitinfo_dict[gal][name]['fixB'] else summ['mean'].logB]) - 1.) ax.loglog(freqs, fit_model_func(freqs, summ['mean']['logA'], summ['mean']['index'],
with pm.Model() as m4: # unpooled model α = pm.Normal('α', 0, 0.1, shape=2) β = pm.Normal('β', 0, 0.3, ) σ = pm.Exponential('σ', 1) μ = α[dfinal.cont_africa.values] + β * (dfinal.rugged_s.values - rbar) log_gdp_s_i = pm.Normal('log_gdp_s_i', μ, σ, observed=dfinal.log_gdp_s.values) with m3: trace_m3 = pm.sample() with m4: trace_m4 = pm.sample() m3.name = 'm3' m4.name = 'm4' pm.compare({m3: trace_m3, m4: trace_m4}) pm.summary(trace_m4, alpha=0.11) # Making the slope conditional with pm.Model() as m5: α = pm.Normal('α', 0, 0.1, shape=2) β = pm.Normal('β', 0, 0.3, shape=2) σ = pm.Exponential('σ', 1) μ = α[dfinal.cont_africa.values] + β[dfinal.cont_africa.values] * (dfinal.rugged_s.values - rbar) log_gdp_s_i = pm.Normal('log_gdp_s_i', μ, σ, observed=dfinal.log_gdp_s.values) trace_m5 = pm.sample() pm.summary(trace_m5, alpha=0.11).round(decimals=2) m5.name = 'm5' pm.compare({m3: trace_m3, m4: trace_m4, m5: trace_m5}, ic='LOO')
plt.savefig('5partial_model.png', dpi=300, figsize=[14, 15]) pm.traceplot(chain3, varnames4) plt.show() # 画出自相关曲线 pm.autocorrplot(chain3) plt.show() # plt.figure(figsize=(6, 14)) # pm.forestplot(chain3, varnames=['beta']) # plt.show() print(pm.dic(trace3, partial_model)) # ====================================================================== # 模型对比 # ====================================================================== Waic = pm.compare([traces_ols_glm, trace1, trace3], [mdl_ols_glm, pooled_model, partial_model], ic='WAIC') # Waic = pm.compare([traces_ols_glm, trace1, trace2, trace3], [mdl_ols_glm, pooled_model, unpooled_model, partial_model], ic='WAIC') print(Waic) # # 画出A公司的产品曲线 # sig0 = pm.hpd(trace['theta'], alpha=0.6)[0] # # plt.figure() # ax = sns.distplot(sig0)
def compare_models(models, **kwargs): """ Compares multiple fitted models. Parameters ---------- models : list of glambox.GLAM List of fitted GLAM model instances. **kwargs : optional Additional keyword arguments to be passed to pymc3.compare Returns ------- pandas.DataFrame DataFrame containing information criteria for each model. """ # Check that more than one model is entered assert len(models) > 1, "Must enter at least two models." # Check model names, create some if there are none for m, model in enumerate(models): if model.name is None: model.name = 'model_{}'.format(m) # Check that all models have the same type: assert all([model.type == models[0].type for model in models ]), "Models have different types and cannot be compared." # Check that all models have the same number of PyMC3 models and traces: assert all( [len(model.trace) == len(models[0].trace) for model in models] ), "Model instances have different numbers of subjects and cannot be compared." if models[0].type == 'hierarchical': df = pm.compare( model_dict={model.model: model.trace[0] for model in models}, **kwargs) # read out column names cols = df.columns.tolist() # include model column df.index.name = 'model' df = df.reset_index() # reorder columns so that model comes first df = df[['model'] + cols] elif models[0].type == 'individual': df = [] for s in range(len(models[0].trace)): compare_df_s = pm.compare(model_dict={ model.model[s]: model.trace[s] for model in models }, **kwargs) # read out column names cols = compare_df_s.columns.tolist() # include subject column compare_df_s['subject'] = s # include model column compare_df_s.index.name = 'model' compare_df_s = compare_df_s.reset_index() # reorder columns so that subject and model come first compare_df_s = compare_df_s[['subject', 'model'] + cols] df.append(compare_df_s) df = pd.concat(df).reset_index(drop=True) return df
chains=3, random_seed=SEED, nuts_kwargs=NUTS_KWARGS) pm.traceplot(pooled_trace) plt.savefig("pooled_trace.png") plt.close() pooled_model.name = "Pooled" unpooled_model.name = "Unpooled" hierarchical_model.name = "Hierarchical" dfComp = pm.compare( { hierarchical_model: hierarchical_trace, pooled_model: pooled_trace, unpooled_model: unpooled_trace }, ic="LOO") print(dfComp) pm.compareplot(dfComp) plt.tight_layout() plt.savefig("compare.png") plt.close() g = sns.FacetGrid(df, col="Year", col_wrap=5) g = g.map(plt.scatter, "UnionMembership", "DemShare") x = np.linspace(-2, 2, 100) for i, ax in enumerate(g.axes.flat): p_state = hierarchical_trace[ "a_year"][:, i] + hierarchical_trace["b_year"][:, i] * x[:, None]
] ) ) .astype(np.float64) .round(2) ) # %% with m11_2: trace_11_2 = pm.sample(1000, tune=1000) with m11_3: trace_11_3 = pm.sample(1000, tune=1000) # %% comp_df = pm.compare({m11_1: trace_11_1, m11_2: trace_11_2, m11_3: trace_11_3}) comp_df.loc[:, "model"] = pd.Series(["m11.1", "m11.2", "m11.3"]) comp_df = comp_df.set_index("model") comp_df # %% pp_df = pd.DataFrame( np.array([[0, 0, 0], [0, 0, 1], [1, 0, 0], [1, 0, 1], [0, 1, 0], [0, 1, 1]]), columns=["action", "contact", "intention"], ) # %% pp_df # %%
alpha=alpha, marker='o') # Plot generated exemplars ax.scatter(data_all[pid][20:, 0], data_all[pid][20:, 1], s=20, color='red', alpha=alpha, marker='x') # Plot the ellipses plot_ellipse(ax, ms_post, ss_post) # Standardize axes lim = 2.5 ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax.set_ylim(-lim, lim) ax.set_xlim(-lim, lim) ax.set_title(pid) # Save the figure for easy future access plt.savefig('real_fit%d.pdf' % ki) ## Model comparison #Convert model and trace into dictionary pairs dict_pairs = dict(zip(gmm_all, traces)) #Perform WAIC comparison compare = pm.compare(dict_pairs, ic='WAIC') #Print comparison print(compare)
param['beta3'][ip] * elec_Pca_char2[ip * 42:(ip * 42 + 6)] + \ + param['beta4'][ip] * xl * xl) ) ax.plot(xl, yl2, 'k', linewidth=2, alpha=.05) # ax = sns.violinplot(data=elec_faults2[ip*7:(ip+1)*7]) ax.plot(xp, yp, marker='o', alpha=.8) plt.plot(xl, yl, 'k--', linewidth=2) plt.plot(xl, y2, 'r', linewidth=2) plt.axis([0.5, 7, -.1, 4.5]) plt.title('Subject %s' % (ip + 1)) plt.tight_layout() plt.show() WAIC = pm.compare([trace_1, trace_2b], [model_1, model_2b], ic='WAIC') print(WAIC) # 可靠度计算,beta_mu要除以100还原 post_alpha1 = np.mean(chain_2b['alpha']) post_beta_mu1 = np.mean(chain_2b['beta_mu'])/100 varnames1 = ['alpha', 'beta_mu'] aaa1 = pm.df_summary(trace_2b, varnames1) bbb1 = pd.DataFrame(aaa1) hpdd2_5 = bbb1['hpd_2.5'] hpdd97_5 = bbb1['hpd_97.5'] hpd2_5_alpha = hpdd2_5[:1].mean() hpd97_5_alpha = hpdd97_5[:1].mean()
alpha=alpha3, beta=theta3C, observed=ys_faultsC) # 观测值 # step1 = pm.Slice([Δ_a]) start = pm.find_MAP() trace_3 = pm.sample(1000, start=start, njobs=1) ax = pm.energyplot(trace_3) bfmi3 = pm.bfmi(trace_3) ax.set_title(f"BFMI = {bfmi3:.2f}") plt.show() pm.traceplot(trace_3) plt.show() WAIC3 = pm.compare([trace_1, trace_3], [model_1, model_3], ic='WAIC') print('WAIC1: ', WAIC3) # Leave-one-out Cross-validation df_comp_LOO = pm.compare([trace_1, trace_3], [model_1, model_3], ic='LOO') print(df_comp_LOO) # 后验分析 varnames2 = ['theta3', 'theta3B', 'theta3C'] tmp3 = pm.df_summary(trace_3, varnames2) MAP_tmp3 = tmp3['mean'] # 计算均方误差 def Rmse(predictions, targets):
create_figure_timeseries(traces[0], 'tab:red', plot_red_axis=True, save_to=path_to_save + 'time.1', add_more_later=False) create_figure_timeseries(traces[1], 'tab:orange', plot_red_axis=True, save_to=path_to_save + 'time.2', add_more_later=False) create_figure_timeseries(traces[2], 'tab:green', plot_red_axis=True, save_to=path_to_save + 'time.3', add_more_later=False) loo = [pm.loo(e, scale='deviance', pointwise=True) for e in traces] for e in reversed(loo): print("lo: %.2f %.2f %.2f" % (e['loo'], e['loo_se'], e['p_loo'])) models[0].name = 'one point' models[1].name = 'two points' models[2].name = 'three points' compare = pm.compare( { models[0].name: traces[0], models[1].name: traces[1], models[2].name: traces[2] }, ic='LOO', scale='deviance') print(compare)