예제 #1
0
파일: test_stats.py 프로젝트: afcarl/arviz
def test_compare():
    np.random.seed(42)
    x_obs = np.random.normal(0, 1, size=100)

    with pm.Model() as model0:
        mu = pm.Normal('mu', 0, 1)
        x = pm.Normal('x', mu=mu, sd=1, observed=x_obs)
        trace0 = pm.sample(1000)

    with pm.Model() as model1:
        mu = pm.Normal('mu', 0, 1)
        x = pm.Normal('x', mu=mu, sd=0.8, observed=x_obs)
        trace1 = pm.sample(1000)

    with pm.Model() as model2:
        mu = pm.Normal('mu', 0, 1)
        x = pm.StudentT('x', nu=1, mu=mu, lam=1, observed=x_obs)
        trace2 = pm.sample(1000)

    traces = [trace0, copy.copy(trace0)]
    models = [model0, copy.copy(model0)]

    model_dict = dict(zip(models, traces))

    w_st = compare(model_dict, method='stacking')['weight']
    w_bb_bma = compare(model_dict, method='BB-pseudo-BMA')['weight']
    w_bma = compare(model_dict, method='pseudo-BMA')['weight']

    assert_almost_equal(w_st[0], w_st[1])
    assert_almost_equal(w_bb_bma[0], w_bb_bma[1])
    assert_almost_equal(w_bma[0], w_bma[1])

    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_bb_bma), 1.)
    assert_almost_equal(np.sum(w_bma), 1.)

    traces = [trace0, trace1, trace2]
    models = [model0, model1, model2]

    model_dict = dict(zip(models, traces))

    w_st = pm.compare(model_dict, method='stacking')['weight']
    w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight']
    w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight']

    assert (w_st[0] > w_st[1] > w_st[2])
    assert (w_bb_bma[0] > w_bb_bma[1] > w_bb_bma[2])
    assert (w_bma[0] > w_bma[1] > w_bma[2])

    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_st), 1.)
def compare_waic_for_models(prior_type):

    model_trace_dict = {}
    if prior_type == 'weak':
        prior_b_std = [100]
    else:
        prior_b_std = [5]
    for degree in range(1, 7):
        for prior_b_sigma in prior_b_std:
            model, trace = load_model_trace(
                'chapter_06/fitted_models/M%d_b_std_%d.pkl' %
                (degree, prior_b_sigma))
            model.name = 'M%d_b_std_%d.pkl' % (degree, prior_b_sigma)
            model_trace_dict[model.name] = trace

    df_comp_WAIC = pm.compare(model_trace_dict)
    st.table(
        df_comp_WAIC.style.format({
            'waic': '{:.2f}',
            'p_waic': '{:.2f}',
            'd_waic': '{:.2f}',
            'weight': '{:.2f}',
            'se': '{:.2f}',
            'dse': '{:.2f}'
        }))
    fig, ax = plt.subplots(figsize=(6, 6))
    pm.compareplot(df_comp_WAIC)
    st.pyplot()
예제 #3
0
def model_gen_fit(model_specs, test, subj_idx, thresh_iqr=5, **sample_kws):
    """Test model fitting for generated data."""

    n_model = len(model_specs)
    n_subj = len(np.unique(subj_idx))
    model_names = [spec['name'] for spec in model_specs]

    # initialize output variables for all comparisons
    comp_vars = [
        'rank', 'loo', 'p_loo', 'd_loo', 'weight', 'se', 'dse', 'warning'
    ]
    results = {'winner': np.zeros((n_model, n_model), dtype=int)}
    for var in comp_vars:
        shape = (n_model, n_model)
        if var == 'warning':
            results[var] = np.zeros(shape, dtype=bool)
        elif var == 'rank':
            results[var] = np.zeros(shape, dtype=int)
        else:
            results[var] = np.zeros(shape)

    for i, gen_spec in enumerate(model_specs):
        # generate a parameter set
        param, subj_param = sample_params(gen_spec['fixed'], gen_spec['param'],
                                          gen_spec['subj_param'], n_subj)

        # generate data from the random parameters
        gen_model = gen_spec['model']
        raw = gen_model.gen(test, param, subj_idx, subj_param=subj_param)

        # remove extreme and missing values
        data = task.scrub_rt(raw, thresh_iqr)
        rt = data.rt.values
        response = data.response.values
        samp_test = data.test_type.values
        samp_subj = data.subj_idx.values
        all_trace = {}
        for j, fit_spec in enumerate(model_specs):
            fit_model = fit_spec['model']
            graph = fit_model.init_graph_hier(rt, response, samp_test,
                                              samp_subj)
            trace = pm.sample(model=graph, **sample_kws)
            all_trace[fit_spec['name']] = trace

        # compare models
        df_comp = pm.compare(all_trace,
                             ic='LOO',
                             method='BB-pseudo-BMA',
                             b_samples=10000)

        # save results in correct position
        for j, name in enumerate(model_names):
            for var in comp_vars:
                results[var][i, j] = df_comp.loc[name, var]
            results['winner'][i, j] = 1 if df_comp.loc[name,
                                                       'rank'] == 0 else 0
    return results
def comp_e(model_matrix, file_name):
    # fit the two models
    model_rbfcls, trace_rbfcls = fit_choice_models.sample_hier_rbf_cls(model_matrix)
    model_rbfkal, trace_rbfkal = fit_choice_models.sample_heir_rbf_kal(model_matrix)

    # compare
    df_comp_loo = pm.compare(
        {
            model_rbfcls: trace_rbfcls,
            model_rbfkal: trace_rbfkal,
        }, ic='LOO')

    df_comp_loo.rename(index={0: 'RBF/Cluster', 1: 'RBF/Kalman'}, inplace=True)
    df_comp_loo.to_pickle(file_name)
예제 #5
0
def test_compare():
    np.random.seed(42)
    x_obs = np.random.normal(0, 1, size=100)

    with pm.Model() as model0:
        mu = pm.Normal('mu', 0, 1)
        x = pm.Normal('x', mu=mu, sd=1, observed=x_obs)
        trace0 = pm.sample(1000)

    with pm.Model() as model1:
        mu = pm.Normal('mu', 0, 1)
        x = pm.Normal('x', mu=mu, sd=0.8, observed=x_obs)
        trace1 = pm.sample(1000)

    with pm.Model() as model2:
        mu = pm.Normal('mu', 0, 1)
        x = pm.StudentT('x', nu=1, mu=mu, lam=1, observed=x_obs)
        trace2 = pm.sample(1000)

    traces = [trace0, copy.copy(trace0)]
    models = [model0, copy.copy(model0)]

    model_dict = dict(zip(models, traces))

    w_st = pm.compare(model_dict, method='stacking')['weight']
    w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight']
    w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight']

    assert_almost_equal(w_st[0], w_st[1])
    assert_almost_equal(w_bb_bma[0], w_bb_bma[1])
    assert_almost_equal(w_bma[0], w_bma[1])

    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_bb_bma), 1.)
    assert_almost_equal(np.sum(w_bma), 1.)

    traces = [trace0, trace1, trace2]
    models = [model0, model1, model2]

    model_dict = dict(zip(models, traces))
    
    w_st = pm.compare(model_dict, method='stacking')['weight']
    w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight']
    w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight']

    assert(w_st[0] > w_st[1] > w_st[2])
    assert(w_bb_bma[0] > w_bb_bma[1] > w_bb_bma[2])
    assert(w_bma[0] > w_bma[1] > w_bma[2])

    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_st), 1.)
def model_comparison_WAIC(models,
                          path,
                          file_id,
                          MODEL_NAME_MAP,
                          should_plot=True,
                          export=True):
    """Conduct some model comparison using WAIC, give a list of models"""
    traces = [model.trace for model in models]
    models = [model.model for model in models]
    WAIC = (pm.compare(traces, models).rename(index=MODEL_NAME_MAP))
    if should_plot is True:
        pm.compareplot(WAIC)
        if export is True:
            plt.savefig(f'{path}/{file_id}_WAIC.pdf',
                        format='pdf',
                        bbox_inches='tight')
        plt.cla()
    return WAIC
예제 #7
0
def compare(models, labels=None, insample_dev=False, **kwargs):
    """Easier model comparison for BAMBI models

    Automatically expands model terms into formulas and sets them as model names

    :param models: list of BAMBI model objects
    :param kwargs: keyword args for PyMC3 model comparison function
    :returns: tuple of matplotlib figure object of model comparison and pandas DataFrame of model statistics
    """
    traces = dict()
    if type(models) is dict:
        for label, model in models.items():
            traces[label] = model.backend.trace
    else:
        for model in models:
            traces[' + '.join(model.terms.keys())] = model.backend.trace
    comparison = pm.compare(traces, **kwargs)
    g = pm.compareplot(comparison, insample_dev=insample_dev)
    return g, comparison
예제 #8
0
def model_uncertainty(splits, stakes, actions, temp=1., sd=1.):
    with pm.Model() as repeated_model:
        r = pm.Gamma('r', alpha=1, beta=1)
        p = pm.Gamma('p', alpha=1, beta=1)
        t = pm.Beta('t', alpha=2, beta=5)
        st = pm.Beta('st', alpha=1, beta=1)
        c = pm.Gamma('c', alpha=1, beta=1)
        odds_a = np.exp(2 * r * splits + c * stakes**st)
        odds_r = np.exp(p * (splits < 0.5 - t / 2))
        p = odds_a / (odds_r + odds_a)
        a = pm.Binomial('a', 1, p, observed=actions)
        fitted = pm.fit(method='advi')
        trace_repeated = fitted.sample(2000)
        # trace_repeated = pm.sample(200000, step=pm.Slice(), chains=2, cores=4)

    # with pm.Model() as simple_model:
    #   r = pm.Normal('r', mu=0, sd=1)
    #   p = np.exp(r*splits) / (1 + np.exp(r*splits))
    #   a = pm.Binomial('a', 1, p, observed=actions)
    #   trace_simple = pm.sample(2000, init='map')

    with pm.Model() as fairness_model:
        r = pm.Gamma('r', alpha=1, beta=1)
        t = pm.Beta('t', alpha=2, beta=5)
        f = pm.Normal('f', mu=0, sd=sd)
        st = pm.Beta('st', alpha=1, beta=1)
        c = pm.Gamma('c', alpha=1, beta=1)
        odds = np.exp(c * stakes**st + splits * r - f * (splits < 0.5 - t / 2))
        p = odds / (1 + odds)
        a = pm.Binomial('a', 1, p, observed=actions)
        fitted = pm.fit(method='advi')
        trace_fairness = fitted.sample(2000)
        # trace_fairness = pm.sample(200000, step=pm.Slice(), chains=2, cores=4)

    fairness_model.name = 'fair'
    repeated_model.name = 'repeated'
    model_dict = dict(
        zip([fairness_model, repeated_model],
            [trace_fairness, trace_repeated]))
    comp = pm.compare(model_dict, ic='LOO', method='BB-pseudo-BMA')
    return trace_fairness, trace_repeated, comp
예제 #9
0
def compare(models):
    """
    Compare models on WAIC (and some other measures)

    In:
        fitted_models: iterable of fitted Model instances

    Out:
        DataFrame, indexed by model names, columns having comparison values
    """

    # variable needs to be Series instead of just list b/c 'pm.compare' returns dataframe which is sorted
    # by information criterion value. need to match model names to entries of that dataframe by index,
    # which indicates initial position of the model when given to this function
    # note: silly design by pymc3
    model_names = pd.Series([fm.name for fm in models])

    model_dict = {fm.model: fm.trace for fm in models}
    return (pm.compare(
        model_dict=model_dict,
        method='BB-pseudo-BMA').assign(model=model_names).set_index('model'))
예제 #10
0
파일: bma.py 프로젝트: edupard/portfolio
    def get_prediction_weights(self,
                               predictions,
                               observations,
                               N_SAMPLES=1000,
                               N_TUNES=1000,
                               method='stacking'):
        if predictions.shape[1] == 0:
            weak_predictors_num = predictions.shape[0]
            return np.full((weak_predictors_num), 1 / weak_predictors_num)

        sigma_start = np.std(observations)
        aplha_start = 1
        beta_start = 0

        models = []
        traces = []

        for i in range(predictions.shape[0]):
            p = predictions[i, :]

            with pm.Model() as model:
                sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start)
                alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start)
                beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start)
                mu = alpha * p + beta
                likelihood = pm.Normal('likelihood',
                                       mu=mu,
                                       sd=sigma,
                                       observed=observations)
                trace = pm.sample(N_SAMPLES, tune=N_TUNES)
                models.append(model)
                traces.append(trace)

        compare_ds = pm.compare(traces, models, method=method)

        return compare_ds.weight.sort_index(ascending=True)
예제 #11
0
plt.show()
az.plot_forest(trace)
plt.show()

# might need to multiply by -2 to compare with McElreath
with m6_11:
    print(pm.waic(trace))
    print(pm.loo(trace))


#m6_13 = pm.Model()
with pm.Model() as m6_13:
    alpha = pm.Uniform('alpha', 0, 5)
    bm = pm.Uniform('bm', -10, 10)
    log_sigma = pm.Uniform('log_sigma', -10, 10)
    mu = alpha + bm*d['lmass']
    y_obs = pm.Normal('y_obs', mu=mu, sigma=np.exp(log_sigma), observed=d['kcal.per.g'])
    trace = pm.sample(2000, return_inferencedata=True, chains=2)

with m6_13:
    print(pm.summary(trace))
    print(pm.waic(trace))
    print(pm.loo(trace))


with m6_13:
    print(-2*pm.loo(trace))

pm.compare({m6_11: trace, m6_13: trace})
pm.compare({m6_11: trace, m6_13: trace}, ic='WAIC')
예제 #12
0
plot_poserterior_mean(tracem73['mu'], dd.rugged, dd.log_gdp)

# 7.4
with pm.Model() as m7_4:
    alpha = pm.Normal('alpha', mu = 8, sigma = 100)
    beta = pm.Normal('beta', sigma = 1)
    beta2 = pm.Normal('beta2', sigma = 1)
    sigma = pm.Uniform('sigma', upper = 10)
    mu = pm.Deterministic('mu', alpha + beta*dd.rugged.values + beta2*dd.cont_africa.values)
    log_gdp = pm.Normal('log_gdp',mu=mu, sigma = sigma, observed = dd.log_gdp.values)
    tracem74 = pm.sample(draws=1000, tune = 1000)

# 7.5
m7_3.name = 'm73'
m7_4.name = 'm74'
pm.compare({m7_3:tracem73, m7_4:tracem74})

# 7.6
rugged_seq = np.arange(-1,8,.25)
mu_Af = np.zeros((len(rugged_seq),tracem74['mu'].shape[0]))
mu_noAf = np.zeros((len(rugged_seq),tracem74['mu'].shape[0]))

for row, seq in enumerate(rugged_seq):
    mu_Af[row,:] = tracem74['alpha'] + tracem74['beta']*rugged_seq[row] + tracem74['beta2']*1
    mu_noAf[row,:] = tracem74['alpha'] + tracem74['beta']*rugged_seq[row] + tracem74['beta2']*0

hpd_af = az.hpd(mu_Af.T,credible_interval=.97)
hpd_noaf = az.hpd(mu_noAf.T,credible_interval=.97)


plt.plot(da1.rugged, da1.log_gdp, marker = 'o', linestyle = '', color = 'blue')
예제 #13
0
hpd25_beta_mu = hpd2_5[1:].mean()
hpd975_beta_mu = hpd97_5[1:].mean()

# 可靠度函数
ax = plt.subplot(1, 1, 1)
t = np.arange(1, 7, 1)
R1 = np.exp(-((t / post_beta_mu1)**post_alpha1))
R2 = np.exp(-((t / hpd25_beta_mu)**hpd2_5_alpha))
R3 = np.exp(-((t / hpd975_beta_mu)**hpd97_5_alpha))
# plt.plot(t, R2, 'k-', t, R1, 'bo--', t, R3, 'r')
plt.plot(t, R2, 'k-', t, R3, 'r')
ax.legend([u'可靠度区间2.5', u'可靠度均值', u'可靠度区间97.5'], prop=font)
plt.show()

print(pm.dic(trace2, unpooled_model))
A = pm.compare([trace1, trace2], [pooled_model, unpooled_model], ic='WAIC')
print(A)
pm.compareplot(A)
plt.show()

# 进行预测
# elec_year1 = elec_year
# elec_year1[0:84] = 7
# elec_year1[5:42:6] = 7
# elec_year1 = int(np.ones(len(elec_faults))*7)
print(elec_faults.mean())
# elec_faults2 = np.zeros(len(elec_faults))
x_shared.set_value(np.asarray(test_year))
# y_shared.set_value(elec_faults2)
Num_shared.set_value(np.asarray(test_abc))
# print(elec_faults.mean())
예제 #14
0
# Model with no collinearity
#%%
with pm.Model() as model_no_collinear:
    a = pm.Normal('a', mu=10, sigma=100)
    br = pm.Normal('br', mu=2, sigma=10)
    sigma = pm.Uniform('sigma', lower=0, upper=10)
    mu = pm.Deterministic('mu', a + br * leg_right)
    h = pm.Normal('h', mu=mu, sigma=sigma, observed=height)
    trace_no_collinear = pm.sample(cores=2)

#%%
model_collinear.name = 'collinear'
model_no_collinear.name = 'no-collinear'
df_comp_models = pm.compare({
    model_collinear: trace_collinear,
    model_no_collinear: trace_no_collinear
})
df_comp_models

#%%
pm.forestplot(trace_collinear, var_names=['a', 'bl', 'br', 'sigma'])
pm.forestplot(trace_no_collinear, var_names=['a', 'br', 'sigma'])

# Posterior predictive
#%%
collinear_ppc = pm.sample_posterior_predictive(trace_collinear,
                                               samples=500,
                                               model=model_collinear)
no_collinear_ppc = pm.sample_posterior_predictive(trace_no_collinear,
                                                  samples=500,
                                                  model=model_no_collinear)
    # Regression
    mu = a + bA * age
    happy_hat = pm.Normal('happy_hat', mu=mu, sd=sigma, observed=happiness)

    # Prior sampling, trace definition and posterior sampling
    prior = pm.sample_prior_predictive(samples=30)
    posterior_610 = pm.sample()
    posterior_pred_610 = pm.sample_posterior_predictive(posterior_610)

az.summary(posterior_610, credible_interval=.89).round(2)
pm.traceplot(posterior_610)

model_69.name = 'model_69'
model_610.name = 'model_610'
pm.compare({
    model_69: posterior_69,
    model_610: posterior_610
})

#The model that produces the invalid inference, m6.9, is expected to predict much better.
#And it would. This is because the collider path does convey actual association.
#We simply end up mistaken about the causal inference.
#We should not use WAIC (or LOO) to choose among models, unless we have some clear sense of the causal model.

#Q3
d = pd.read_csv('../../data/foxes.csv', sep=';', header=0)
d.head()
d[['avgfood', 'groupsize', 'area', 'weight'
   ]] = preprocessing.scale(d[['avgfood', 'groupsize', 'area', 'weight']])
d.head()

avgfood = theano.shared(np.array(d.avgfood))
예제 #16
0
    beta = pm.HalfNormal('beta', sd=10.)

    pm.Cauchy('returns', alpha=0.0, beta=beta, observed=returns)

    mean_field = pm.fit(n=150000,
                        method='advi',
                        obj_optimizer=pm.adam(learning_rate=.001))

    trace2 = mean_field.sample(draws=10000)

preds2 = pm.sample_ppc(trace2, samples=10000, model=model2)
y2 = np.reshape(np.mean(preds2['returns'], axis=0), [-1])

fig, (ax1, ax2) = plt.subplots(1, 2)

ax1.hist(y2)
ax1.set_title('Cauchy distribution returns')
ax2.hist(returns)
ax2.set_title('Real returns')

plt.show()

print "Estimating LOO..."

# Let's compare the fit of both models
model1.name = 'Gaussian model'
model2.name = 'Cauchy model'
df_LOO = pm.compare({model1: trace, model2: trace2}, ic='LOO')

print "LOO comparison table: ", df_LOO
예제 #17
0
    # Legend
    handles = [p1[0], p2[0], p3[0], p4[0]]
    labels = ['Data', 'Low', 'Mean', 'High']
    ax.legend(handles, labels)
    ax.grid()
    plt.show()

# *************************************************************************************************
# Compute WAIC for both models
waic_base = pm.waic(trace_base, model_base)
waic_sex = pm.waic(trace_sex, model_sex)
# Set model names
model_base.name = 'base'
model_sex.name = 'sex'
# Comparison of WAIC
comp_WAIC_base_v_sex = pm.compare({model_base: trace_base, model_sex: trace_sex})
display(comp_WAIC_base_v_sex)
pm.compareplot(comp_WAIC_base_v_sex)

# Generate the posterior predictive in both base and sex models
try:
    post_pred_base = vartbl['post_pred_base']
    post_pred_sex = vartbl['post_pred_sex']
    print(f'Loaded posterior predictive for base and sex models.')
except:
    with model_base:
        post_pred_base = pm.sample_ppc(trace_base)
    with model_sex:
        post_pred_sex = pm.sample_ppc(trace_sex)
    vartbl['post_pred_base'] = post_pred_base
    vartbl['post_pred_sex'] = post_pred_sex
    draws=args.ndraws,
    tune=args.ntune,
    # backend='saved_gzb_bhsm_trace'
)

cot_uniform_bhsm = CotUniformBHSM(galaxies.values)

cot_uniform_trace = cot_uniform_bhsm.do_inference(
    draws=args.ndraws,
    tune=args.ntune,
    # backend='saved_gzb_bhsm_trace'
)

loo = pm.compare({
    bhsm.model: trace,
    cot_uniform_bhsm.model: cot_uniform_trace
},
                 ic='LOO')

print('\n', loo)

# save EVERYTHING
with open(args.output, "wb") as buff:
    pickle.dump(
        {
            'normal_model': bhsm,
            'normal_trace': trace,
            'cot_model': cot_uniform_bhsm,
            'cot_trace': cot_uniform_trace,
            'loo': loo,
            'n_samples': args.ndraws,
def fitCompare(data, subject, n_tries=1, overwrite=False, progressbar=True):
    """
    Perform fitting of GLAM variants and
    WAIC model comparisons for a single subject
    1) Multiplicative vs Additive
    3) Multiplicative vs No Bias
    4) Multiplicative vs Additive vs No Bias
    """

    print("Processing subject {}...".format(subject))

    # Subset data
    subject_data = data[data['subject'] == subject].copy()
    n_items = subject_data['n_items'].values[0]
    if n_items == 2:
        subject_data = subject_data.drop(['item_value_2', 'gaze_2'], axis=1)
    subject_data['subject'] = 0

    # model specifiations
    model_names = ('GLAM', 'additive', 'nobias')
    drifts = ('multiplicative', 'additive', 'multiplicative')
    parameter_sets = (['v', 's', 'tau', 'gamma'], ['v', 's', 'tau',
                                                   'gamma'], ['v', 's', 'tau'])
    gamma_bounds = ((-10, 1), (-100, 100), (-10, 1))
    gamma_vals = (None, None, 1.0)

    # fit models
    converged_models = np.ones(len(model_names))
    models = len(model_names) * [None]
    for i, (model_name, drift, parameters, gamma_bound,
            gamma_val) in enumerate(
                zip(model_names, drifts, parameter_sets, gamma_bounds,
                    gamma_vals)):
        print('\tS{}: {}'.format(subject, model_name))
        model, is_converged = fit_indModel(subject_data,
                                           subject,
                                           drift=drift,
                                           parameters=parameters,
                                           gamma_bounds=gamma_bound,
                                           gamma_val=gamma_val,
                                           t0_val=0,
                                           model_name=model_name)
        models[i] = model
        converged_models[i] = np.int(is_converged)
        if not is_converged:
            break

    # re-sample all converged models, if any model did not converge
    if np.any(converged_models == 0):
        for i in np.where(converged_models == 1)[0]:
            print('\tRe-sampling S{}: {}'.format(subject, model_name))
            model, is_converged = fit_indModel(subject_data,
                                               subject,
                                               drift=drifts[i],
                                               parameters=parameter_sets[i],
                                               gamma_bounds=gamma_bounds[i],
                                               gamma_val=gamma_vals[i],
                                               t0_val=0,
                                               model_name=model_names[i],
                                               n_tries_max=0)
            models[i] = model

    # un-pack models
    if np.any(models == None):
        raise ValueError('Model {} not sampled.'.format(
            model_names[models == None]))
    multiplicative, additive, nobias = models

    # Individual Model Comparisons
    # 1) Multiplicative vs Additive
    try:
        waic_df = pm.compare(
            {
                additive.model[0]: additive.trace[0],
                multiplicative.model[0]: multiplicative.trace[0]
            },
            ic='WAIC')
        path = os.path.join('results', 'model_comparison',
                            'additive_vs_multiplicative')
        make_sure_path_exists(path)
        make_sure_path_exists(path + '/plots/')
        waic_df.to_csv(
            os.path.join(
                path,
                'additive_vs_multiplicative_{}_waic.csv'.format(subject)))
        pm.compareplot(waic_df)
        plt.savefig(
            os.path.join(
                'results', 'model_comparison', 'additive_vs_multiplicative',
                'plots',
                'additive_vs_multiplicative_{}_waic.png'.format(subject)))
        plt.close()
    except:
        print('  /!\  Error in WAIC comparison for subject {}'.format(subject))

    # 2) Multiplicative vs No Bias
    try:
        waic_df = pm.compare(
            {
                multiplicative.model[0]: multiplicative.trace[0],
                nobias.model[0]: nobias.trace[0]
            },
            ic='WAIC')
        path = os.path.join('results', 'model_comparison',
                            'multiplicative_vs_nobias')
        make_sure_path_exists(path)
        make_sure_path_exists(path + '/plots/')
        waic_df.to_csv(
            os.path.join(
                path, 'multiplicative_vs_nobias_{}_waic.csv'.format(subject)))
        pm.compareplot(waic_df)
        plt.savefig(
            os.path.join(
                'results', 'model_comparison', 'multiplicative_vs_nobias',
                'plots',
                'multiplicative_vs_nobias_{}_waic.png'.format(subject)))
        plt.close()
    except:
        print('  /!\  Error in WAIC comparison for subject {}'.format(subject))

    # 3) Multiplicative vs Additive vs No Bias
    try:
        waic_df = pm.compare(
            {
                multiplicative.model[0]: multiplicative.trace[0],
                additive.model[0]: additive.trace[0],
                nobias.model[0]: nobias.trace[0]
            },
            ic='WAIC')
        path = os.path.join('results', 'model_comparison',
                            'additive_vs_multiplicative_vs_nobias')
        make_sure_path_exists(path)
        make_sure_path_exists(path + '/plots/')
        waic_df.to_csv(
            os.path.join(
                path,
                'additive_vs_multiplicative_vs_nobias_{}_waic.csv'.format(
                    subject)))
        pm.compareplot(waic_df)
        plt.savefig(
            os.path.join(
                'results', 'model_comparison',
                'additive_vs_multiplicative_vs_nobias', 'plots',
                'additive_vs_multiplicative_vs_nobias_{}_waic.png'.format(
                    subject)))
        plt.close()
    except:
        print('  /!\  Error in WAIC comparison for subject {}'.format(subject))

    return True
예제 #20
0
파일: bma.py 프로젝트: edupard/portfolio
    def get_weights(self, predictions_aapl, predictions_msft, predictions_bac,
                    observations_aapl):
        N_SAMPLES = 1000
        N_TUNES = 1000

        sigma_start = np.std(observations_aapl)
        aplha_start = 1
        beta_start = 0

        # predictions_shared = theano.shared(predictions_aapl)
        predictions = np.stack(
            [predictions_aapl, predictions_msft, predictions_bac])

        with pm.Model() as model:
            sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start)
            alpha = pm.Normal('alpha',
                              mu=1,
                              sd=1,
                              testval=aplha_start,
                              shape=3)
            beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start, shape=3)
            mu = alpha * predictions + beta
            p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl)
            trace_model = pm.sample(N_SAMPLES, tune=N_TUNES)

        with pm.Model() as model_aapl:
            sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start)
            alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start)
            beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start)
            mu = alpha * predictions_aapl + beta
            p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl)
            trace_model_aapl = pm.sample(N_SAMPLES, tune=N_TUNES)

        with pm.Model() as model_msft:
            sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start)
            alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start)
            beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start)
            mu = alpha * predictions_msft + beta
            p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl)
            trace_model_msft = pm.sample(N_SAMPLES, tune=N_TUNES)

        with pm.Model() as model_bac:
            sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start)
            alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start)
            beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start)
            mu = alpha * predictions_bac + beta
            p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl)
            trace_model_bac = pm.sample(N_SAMPLES, tune=N_TUNES)

        compare_1 = pm.compare(
            [trace_model_aapl, trace_model_msft, trace_model_bac],
            [model_aapl, model_msft, model_bac],
            method='pseudo-BMA')
        compare_2 = pm.compare(
            [trace_model_msft, trace_model_bac, trace_model_aapl],
            [model_msft, model_bac, model_aapl],
            method='pseudo-BMA')

        compare_3 = pm.compare(
            [trace_model_aapl, trace_model_msft, trace_model_bac],
            [model_aapl, model_msft, model_bac],
            method='BB-pseudo-BMA')

        compare_4 = pm.compare(
            [trace_model_aapl, trace_model_msft, trace_model_bac],
            [model_aapl, model_msft, model_bac],
            method='stacking')

        compare_5 = pm.compare([trace_model_msft, trace_model_bac],
                               [model_msft, model_bac],
                               method='pseudo-BMA')

        compare_6 = pm.compare([trace_model_aapl, trace_model_msft],
                               [model_aapl, model_msft],
                               method='BB-pseudo-BMA')

        compare_7 = pm.compare([trace_model_aapl, trace_model_msft],
                               [model_aapl, model_msft],
                               method='stacking')

        # pm.traceplot(trace_model)

        d = pd.read_csv('data/milk.csv', sep=';')
        d['neocortex'] = d['neocortex.perc'] / 100
        d.dropna(inplace=True)
        d.shape

        a_start = d['kcal.per.g'].mean()
        sigma_start = d['kcal.per.g'].std()

        mass_shared = theano.shared(np.log(d['mass'].values))
        neocortex_shared = theano.shared(d['neocortex'].values)

        with pm.Model() as m6_11:
            alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start)
            mu = alpha + 0 * neocortex_shared
            sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start)
            kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g'])
            trace_m6_11 = pm.sample(1000, tune=1000)

        pm.traceplot(trace_m6_11)

        with pm.Model() as m6_12:
            alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start)
            beta = pm.Normal('beta', mu=0, sd=10)
            sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start)
            mu = alpha + beta * neocortex_shared
            kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g'])
            trace_m6_12 = pm.sample(1000, tune=1000)

        with pm.Model() as m6_13:
            alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start)
            beta = pm.Normal('beta', mu=0, sd=10)
            sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start)
            mu = alpha + beta * mass_shared
            kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g'])
            trace_m6_13 = pm.sample(1000, tune=1000)

        with pm.Model() as m6_14:
            alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start)
            beta = pm.Normal('beta', mu=0, sd=10, shape=2)
            sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start)
            mu = alpha + beta[0] * mass_shared + beta[1] * neocortex_shared
            kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g'])
            trace_m6_14 = pm.sample(1000, tune=1000)

        pm.waic(trace_m6_14, m6_14)

        compare_df = pm.compare(
            [trace_m6_11, trace_m6_12, trace_m6_13, trace_m6_14],
            [m6_11, m6_12, m6_13, m6_14],
            method='pseudo-BMA')

        compare_df.loc[:, 'model'] = pd.Series(
            ['m6.11', 'm6.12', 'm6.13', 'm6.14'])
        compare_df = compare_df.set_index('model')
        compare_df

        pm.compareplot(compare_df)
예제 #21
0
 def compare(self, trace, ic='waic', scale='deviance'):
     return pm.compare(trace, ic=ic, scale=scale)
                rand_pars.append(pars)

            # Save the random samples to a npy file
            randfilename = osjoin(model_comparison_folder,
                                  f"{filename}_param_samples.npy")
            np.save(randfilename, np.array(rand_pars))

            # Model comparison via WAIC
            plaw_model.name = 'plaw'
            brok_plaw_model.name = 'brok_plaw'

            # df_comp_WAIC = pm.compare({plaw_model: trace,
            #                            brok_plaw_model: trace_brok},
            #                           ic='WAIC')
            df_comp_LOO = pm.compare({plaw_model: trace,
                                      brok_plaw_model: trace_brok},
                                     ic='LOO')

            plt.figure(figsize=(4.2, 4.2))

            ax = plt.subplot(211)
            # plt.title("Fit_params: {}".format(out[0]))
            ax.loglog(pspec.freqs.value, pspec.ps1D, 'k', zorder=-10)


            beam_amp = 10**(max([summ['mean'].logA, -20 if fitinfo_dict[gal][name]['fixB'] else summ['mean'].logB]) - 1.)

            ax.loglog(freqs,
                      fit_model_func(freqs,
                                     summ['mean']['logA'],
                                     summ['mean']['index'],
예제 #23
0
with pm.Model() as m4:
    # unpooled model
    α = pm.Normal('α', 0, 0.1, shape=2)
    β = pm.Normal('β', 0, 0.3, )
    σ = pm.Exponential('σ', 1)
    μ = α[dfinal.cont_africa.values] + β * (dfinal.rugged_s.values - rbar)
    log_gdp_s_i = pm.Normal('log_gdp_s_i', μ, σ, observed=dfinal.log_gdp_s.values)

with m3:
    trace_m3 = pm.sample()
with m4:
    trace_m4 = pm.sample()
m3.name = 'm3'
m4.name = 'm4'
pm.compare({m3: trace_m3, m4: trace_m4})

pm.summary(trace_m4, alpha=0.11)


# Making the slope conditional
with pm.Model() as m5:
    α = pm.Normal('α', 0, 0.1, shape=2)
    β = pm.Normal('β', 0, 0.3, shape=2)
    σ = pm.Exponential('σ', 1)
    μ = α[dfinal.cont_africa.values] + β[dfinal.cont_africa.values] * (dfinal.rugged_s.values - rbar)
    log_gdp_s_i = pm.Normal('log_gdp_s_i', μ, σ, observed=dfinal.log_gdp_s.values)
    trace_m5 = pm.sample()
pm.summary(trace_m5, alpha=0.11).round(decimals=2)
m5.name = 'm5'
pm.compare({m3: trace_m3, m4: trace_m4, m5: trace_m5}, ic='LOO')
예제 #24
0
plt.savefig('5partial_model.png', dpi=300, figsize=[14, 15])
pm.traceplot(chain3, varnames4)
plt.show()
# 画出自相关曲线
pm.autocorrplot(chain3)
plt.show()

# plt.figure(figsize=(6, 14))
# pm.forestplot(chain3, varnames=['beta'])
# plt.show()

print(pm.dic(trace3, partial_model))

# ======================================================================
# 模型对比
# ======================================================================
Waic = pm.compare([traces_ols_glm, trace1,  trace3], [mdl_ols_glm, pooled_model,  partial_model], ic='WAIC')
# Waic = pm.compare([traces_ols_glm, trace1, trace2, trace3], [mdl_ols_glm, pooled_model, unpooled_model, partial_model], ic='WAIC')
print(Waic)



# # 画出A公司的产品曲线
# sig0 = pm.hpd(trace['theta'], alpha=0.6)[0]
#
# plt.figure()
# ax = sns.distplot(sig0)



예제 #25
0
def compare_models(models, **kwargs):
    """
    Compares multiple fitted models.
    
    Parameters
    ----------
    models : list of glambox.GLAM
        List of fitted GLAM model instances.
    
    **kwargs : optional
        Additional keyword arguments to be passed to pymc3.compare
    
    Returns
    -------
    pandas.DataFrame
        DataFrame containing information criteria for each model.
    """

    # Check that more than one model is entered
    assert len(models) > 1, "Must enter at least two models."

    # Check model names, create some if there are none
    for m, model in enumerate(models):
        if model.name is None:
            model.name = 'model_{}'.format(m)

    # Check that all models have the same type:
    assert all([model.type == models[0].type for model in models
                ]), "Models have different types and cannot be compared."

    # Check that all models have the same number of PyMC3 models and traces:
    assert all(
        [len(model.trace) == len(models[0].trace) for model in models]
    ), "Model instances have different numbers of subjects and cannot be compared."

    if models[0].type == 'hierarchical':
        df = pm.compare(
            model_dict={model.model: model.trace[0]
                        for model in models},
            **kwargs)
        # read out column names
        cols = df.columns.tolist()
        # include model column
        df.index.name = 'model'
        df = df.reset_index()
        # reorder columns so that model comes first
        df = df[['model'] + cols]

    elif models[0].type == 'individual':
        df = []
        for s in range(len(models[0].trace)):
            compare_df_s = pm.compare(model_dict={
                model.model[s]: model.trace[s]
                for model in models
            },
                                      **kwargs)
            # read out column names
            cols = compare_df_s.columns.tolist()
            # include subject column
            compare_df_s['subject'] = s
            # include model column
            compare_df_s.index.name = 'model'
            compare_df_s = compare_df_s.reset_index()
            # reorder columns so that subject and model come first
            compare_df_s = compare_df_s[['subject', 'model'] + cols]
            df.append(compare_df_s)
        df = pd.concat(df).reset_index(drop=True)

    return df
예제 #26
0
                             chains=3,
                             random_seed=SEED,
                             nuts_kwargs=NUTS_KWARGS)

pm.traceplot(pooled_trace)
plt.savefig("pooled_trace.png")
plt.close()

pooled_model.name = "Pooled"
unpooled_model.name = "Unpooled"
hierarchical_model.name = "Hierarchical"

dfComp = pm.compare(
    {
        hierarchical_model: hierarchical_trace,
        pooled_model: pooled_trace,
        unpooled_model: unpooled_trace
    },
    ic="LOO")
print(dfComp)
pm.compareplot(dfComp)
plt.tight_layout()
plt.savefig("compare.png")
plt.close()

g = sns.FacetGrid(df, col="Year", col_wrap=5)
g = g.map(plt.scatter, "UnionMembership", "DemShare")
x = np.linspace(-2, 2, 100)
for i, ax in enumerate(g.axes.flat):
    p_state = hierarchical_trace[
        "a_year"][:, i] + hierarchical_trace["b_year"][:, i] * x[:, None]
            ]
        )
    )
    .astype(np.float64)
    .round(2)
)

# %%
with m11_2:
    trace_11_2 = pm.sample(1000, tune=1000)

with m11_3:
    trace_11_3 = pm.sample(1000, tune=1000)

# %%
comp_df = pm.compare({m11_1: trace_11_1, m11_2: trace_11_2, m11_3: trace_11_3})

comp_df.loc[:, "model"] = pd.Series(["m11.1", "m11.2", "m11.3"])
comp_df = comp_df.set_index("model")
comp_df

# %%
pp_df = pd.DataFrame(
    np.array([[0, 0, 0], [0, 0, 1], [1, 0, 0], [1, 0, 1], [0, 1, 0], [0, 1, 1]]),
    columns=["action", "contact", "intention"],
)

# %%
pp_df

# %%
예제 #28
0
                           alpha=alpha,
                           marker='o')
                # Plot generated exemplars
                ax.scatter(data_all[pid][20:, 0],
                           data_all[pid][20:, 1],
                           s=20,
                           color='red',
                           alpha=alpha,
                           marker='x')

            # Plot the ellipses
            plot_ellipse(ax, ms_post, ss_post)
            # Standardize axes
            lim = 2.5
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)
            ax.set_ylim(-lim, lim)
            ax.set_xlim(-lim, lim)
            ax.set_title(pid)

    # Save the figure for easy future access
    plt.savefig('real_fit%d.pdf' % ki)

## Model comparison
#Convert model and trace into dictionary pairs
dict_pairs = dict(zip(gmm_all, traces))
#Perform WAIC comparison
compare = pm.compare(dict_pairs, ic='WAIC')
#Print comparison
print(compare)
예제 #29
0
                                                   param['beta3'][ip] * elec_Pca_char2[ip * 42:(ip * 42 + 6)] + \
                                                   + param['beta4'][ip] * xl * xl)
                     )
        ax.plot(xl, yl2, 'k', linewidth=2, alpha=.05)

    # ax = sns.violinplot(data=elec_faults2[ip*7:(ip+1)*7])
    ax.plot(xp, yp, marker='o', alpha=.8)
    plt.plot(xl, yl, 'k--', linewidth=2)
    plt.plot(xl, y2, 'r', linewidth=2)
    plt.axis([0.5, 7, -.1, 4.5])
    plt.title('Subject %s' % (ip + 1))

plt.tight_layout()
plt.show()

WAIC = pm.compare([trace_1, trace_2b], [model_1, model_2b], ic='WAIC')
print(WAIC)


# 可靠度计算,beta_mu要除以100还原
post_alpha1 = np.mean(chain_2b['alpha'])
post_beta_mu1 = np.mean(chain_2b['beta_mu'])/100

varnames1 = ['alpha', 'beta_mu']
aaa1 = pm.df_summary(trace_2b, varnames1)
bbb1 = pd.DataFrame(aaa1)

hpdd2_5 = bbb1['hpd_2.5']
hpdd97_5 = bbb1['hpd_97.5']
hpd2_5_alpha = hpdd2_5[:1].mean()
hpd97_5_alpha = hpdd97_5[:1].mean()
예제 #30
0
                            alpha=alpha3,
                            beta=theta3C,
                            observed=ys_faultsC)  # 观测值
    #     step1 = pm.Slice([Δ_a])
    start = pm.find_MAP()
    trace_3 = pm.sample(1000, start=start, njobs=1)

ax = pm.energyplot(trace_3)
bfmi3 = pm.bfmi(trace_3)
ax.set_title(f"BFMI = {bfmi3:.2f}")
plt.show()

pm.traceplot(trace_3)
plt.show()

WAIC3 = pm.compare([trace_1, trace_3], [model_1, model_3], ic='WAIC')
print('WAIC1: ', WAIC3)

# Leave-one-out Cross-validation
df_comp_LOO = pm.compare([trace_1, trace_3], [model_1, model_3], ic='LOO')
print(df_comp_LOO)

# 后验分析
varnames2 = ['theta3', 'theta3B', 'theta3C']
tmp3 = pm.df_summary(trace_3, varnames2)

MAP_tmp3 = tmp3['mean']


# 计算均方误差
def Rmse(predictions, targets):
예제 #31
0
create_figure_timeseries(traces[0],
                         'tab:red',
                         plot_red_axis=True,
                         save_to=path_to_save + 'time.1',
                         add_more_later=False)
create_figure_timeseries(traces[1],
                         'tab:orange',
                         plot_red_axis=True,
                         save_to=path_to_save + 'time.2',
                         add_more_later=False)
create_figure_timeseries(traces[2],
                         'tab:green',
                         plot_red_axis=True,
                         save_to=path_to_save + 'time.3',
                         add_more_later=False)
loo = [pm.loo(e, scale='deviance', pointwise=True) for e in traces]
for e in reversed(loo):
    print("lo: %.2f %.2f %.2f" % (e['loo'], e['loo_se'], e['p_loo']))
models[0].name = 'one point'
models[1].name = 'two points'
models[2].name = 'three points'
compare = pm.compare(
    {
        models[0].name: traces[0],
        models[1].name: traces[1],
        models[2].name: traces[2]
    },
    ic='LOO',
    scale='deviance')
print(compare)