Ejemplo n.º 1
0
 def compute_waic(self):
     if not isinstance(self.model, list):
         self.waic = pm.waic(trace=self.trace, model=self.model)
     else:
         self.waic = np.array([
             pm.waic(trace=trace, model=model)
             for (trace, model) in zip(self.trace, self.model)
         ])
Ejemplo n.º 2
0
    def get_metrics(self, kind=['mse', 'mae', 'loo', 'aic'], sample_size=5000):
        if self.predicted is None:
            self._predict_in_sample(sample_size=sample_size, use_median=False)

        records = {}
        for kind_ in kind:
            if kind_.lower() == 'mse':
                records['mse'] = np.mean(
                    np.square(self.response - self.predicted))
            elif kind_.lower() == 'mae':
                records['mae'] = np.mean(np.abs(self.response -
                                                self.predicted))
            elif kind_.lower() == 'waic':
                records['waic'] = pm.waic(self.trace, self.model).WAIC
            elif kind_.lower() == 'loo':
                records['loo'] = pm.loo(self.trace, self.model).LOO
            else:
                raise ValueError(f'{kind_} is not supported.')

        table_content = [['Metrics', 'Value']]
        for key, value in records.items():
            value = str(round(value, 4))
            table_content.append([key, value])

        header = 'Model Fitting Metrics Report'
        BaseModel.pretty_print(header, table, table_len=50)
Ejemplo n.º 3
0
    def compute_waic(self):
        """
        Compute WAIC for all appended
        PyMC3 models.

        Returns
        -------
        None
            Adds WAIC to GLAM model object.
        """
        if not isinstance(self.model, list):
            self.waic = pm.waic(trace=self.trace, model=self.model)
        else:
            self.waic = np.array([
                pm.waic(trace=trace, model=model)
                for (trace, model) in zip(self.trace, self.model)
            ])
Ejemplo n.º 4
0
    def fit(self, sample_options):
        '''Note `sample_options` is a dictionary of options'''
        # sampling
        with self.model:
            self.trace = pm.sample(**sample_options)

        self.waic = pm.waic(self.trace, self.model)
        print(self.waic)
        self.metrics = {
            'log_loss': self.calc_log_loss(),
            'AUC': self.calc_AUC(),
            'WAIC': self.waic.WAIC,
            'roc_auc': self.calc_roc_auc()
        }
Ejemplo n.º 5
0
def posteriorChecks(model, trace):
    """
        Performs various posterior checks
        Posterior Predictive Checks:
            - Simulates replicating data under the fitted model and then comparing these to the observed data
            - This checks for systematic discrepancies between real and simulated data

        Widely-applicable Information Criterion (WAIC):
            - Fully Bayesian criterion for estimating out-of-sample expectation, using the computed log pointwise posterior predictive density (LPPD) and correcting for the effective number of parameters to adjust for overfitting.
            - This is primarilly for the comparison between different models

        Leave-one-out Cross-validation (LOO):
            - Estimate of the out-of-sample predictive fit. In cross-validation, the data are repeatedly partitioned into training and holdout sets, iteratively fitting the model with the former and evaluating the fit with the holdout data. PyMC's implementation of LOO is using Pareto-smoothed importance sampling, it provides and estimate of point-wise out-of-sample prediction accuracy

        Note: out-of-sample is data not used for the fit (ie: making a prediction)

    """
    # Posterior Predictive Checks -- Generates 500 toy samples of size 100
    # This is essentially toy MC
    model_ppc = pm.sample_ppc(trace, samples=500, model=model)

    # Widely-applicable Information Criterion (WAIC)
    model_waic = pm.waic(trace[len(trace) - 100:], model, progressbar=True)

    # Leave-one-out Cross-validation (LOO)
    model_loo = pm.loo(trace[len(trace) - 100:], model, progressbar=True)

    # ppc = pm.sample_ppc(trace[nBurn:], samples=500, model=model)
    # print(np.asarray(ppc['L'].shape), ppc.keys())
    # _, axppc = plt.subplots(figsize=(12, 6))
    # axppc.hist([n.mean() for n in ppc['L']], bins=19, alpha=0.5)
    # axppc.set(title='Posterior predictive for L', xlabel='L(x)', ylabel='Frequency');
    # plt.show()

    # df_comp_WAIC = pm.compare(models = [model, modelBasic], traces = [trace[nBurn:], traceBasic[nBurn]])
    # df_comp_WAIC.head()
    # pm.compareplot(df_comp_WAIC)
    # df_comp_LOO = pm.compare(models = [model, modelBasic], traces = [trace[nBurn:], traceBasic[nBurn]], ic='LOO')
    # df_comp_LOO.head()
    # pm.compareplot(df_comp_LOO)

    # LOO results
    # LOO  pLOO   dLOO weight      SE   dSE warning
    # 0  61479.8  5.68      0   0.94  798.63     0       1
    # 1    61502  4.95  22.12   0.06  798.47  10.2       1

    return model_ppc, model_waic, model_loo
Ejemplo n.º 6
0
def test_waic():
    """Test widely available information criterion calculation"""
    x_obs = np.arange(6)

    with pm.Model() as model:
        p = pm.Beta('p', 1., 1., transform=None)
        x = pm.Binomial('x', 5, p, observed=x_obs)

        step = pm.Metropolis()
        trace = pm.sample(100, step)
        calculated = pm.waic(trace)

    log_py = st.binom.logpmf(np.atleast_2d(x_obs).T, 5, trace['p']).T

    lppd = np.sum(np.log(np.mean(np.exp(log_py), axis=0)))
    p_waic = np.sum(np.var(log_py, axis=0))
    actual = -2 * lppd + 2 * p_waic

    assert_almost_equal(calculated, actual, decimal=2)
Ejemplo n.º 7
0
def test_waic():
    """Test widely available information criterion calculation"""
    x_obs = np.arange(6)

    with pm.Model() as model:
        p = pm.Beta('p', 1., 1., transform=None)
        x = pm.Binomial('x', 5, p, observed=x_obs)

        step = pm.Metropolis()
        trace = pm.sample(100, step)
        calculated = pm.waic(trace)

    log_py = st.binom.logpmf(np.atleast_2d(x_obs).T, 5, trace['p']).T
    
    lppd =  np.sum(np.log(np.mean(np.exp(log_py), axis=0)))
    p_waic = np.sum(np.var(log_py, axis=0))
    actual = -2 * lppd + 2 * p_waic
    
    assert_almost_equal(calculated, actual, decimal=2)
Ejemplo n.º 8
0
    def test_waic(self):
        """Test widely available information criterion calculation"""
        x_obs = np.arange(6)

        with pm.Model():
            p = pm.Beta('p', 1., 1., transform=None)
            pm.Binomial('x', 5, p, observed=x_obs)

            step = pm.Metropolis()
            trace = pm.sample(100, step)
            calculated_waic = pm.waic(trace)

        log_py = st.binom.logpmf(np.atleast_2d(x_obs).T, 5, trace['p']).T

        lppd_i = np.log(np.mean(np.exp(log_py), axis=0))
        vars_lpd = np.var(log_py, axis=0)
        waic_i = - 2 * (lppd_i - vars_lpd)

        actual_waic_se = np.sqrt(len(waic_i) * np.var(waic_i))
        actual_waic = np.sum(waic_i)

        assert_almost_equal(calculated_waic.WAIC, actual_waic, decimal=2)
        assert_almost_equal(calculated_waic.WAIC_se, actual_waic_se, decimal=2)
Ejemplo n.º 9
0
    def test_waic(self):
        """Test widely available information criterion calculation"""
        x_obs = np.arange(6)

        with pm.Model():
            p = pm.Beta('p', 1., 1., transform=None)
            pm.Binomial('x', 5, p, observed=x_obs)

            step = pm.Metropolis()
            trace = pm.sample(100, step)
            calculated_waic = pm.waic(trace)

        log_py = st.binom.logpmf(np.atleast_2d(x_obs).T, 5, trace['p']).T

        lppd_i = np.log(np.mean(np.exp(log_py), axis=0))
        vars_lpd = np.var(log_py, axis=0)
        waic_i = -2 * (lppd_i - vars_lpd)

        actual_waic_se = np.sqrt(len(waic_i) * np.var(waic_i))
        actual_waic = np.sum(waic_i)

        assert_almost_equal(calculated_waic.WAIC, actual_waic, decimal=2)
        assert_almost_equal(calculated_waic.WAIC_se, actual_waic_se, decimal=2)
Ejemplo n.º 10
0
def stationary_posterior(annual_max):
    #    calibration_data=annual_max[:int(3*len(annual_max)/4)]
    calibration_data = annual_max
    #    calibration_data = pd.Series(annual_max['58790000'].values, index=annual_max.index)
    locm = calibration_data.mean()
    locs = calibration_data.std() / (np.sqrt(len(calibration_data)))
    scalem = calibration_data.std()
    scales = calibration_data.std() / (np.sqrt(2 *
                                               (len(calibration_data) - 1)))
    with pm.Model() as model:
        # Priors for unknown model parameters
        c = pm.Beta(
            'c', alpha=6, beta=9
        )  #c=x-0,5: transformation in gev_logp is required due to Beta domain between 0 and 1
        loc = pm.Normal('loc', mu=locm, sd=locs)
        scale = pm.Normal('scale', mu=scalem, sd=scales)

        # Likelihood (sampling distribution) of observations | Since GEV is not implemented in pymc a custom log likelihood function was created
        def gev_logp(value):
            scaled = (value - loc) / scale
            logp = -(tt.log(scale) + (((c - 0.5) + 1) / (c - 0.5) * tt.log1p(
                (c - 0.5) * scaled) + (1 + (c - 0.5) * scaled)**(-1 /
                                                                 (c - 0.5))))
            bound1 = loc - scale / (c - 0.5)
            bounds = tt.switch((c - 0.5) > 0, value > bound1, value < bound1)
            return bound(logp, bounds, c != 0)

        gev = pm.DensityDist('gev', gev_logp, observed=calibration_data)
        #        step = pm.Metropolis()
        trace = pm.sample(5000, chains=2, cores=1, progressbar=True)
    pm.traceplot(trace)
    # geweke_plot=pm.geweke(trace, 0.05, 0.5, 20)
    # gelman_and_rubin=pm.diagnostics.gelman_rubin(trace)
    waic = pm.waic(trace)
    posterior = pm.trace_to_dataframe(trace)
    summary = pm.summary(trace)
    return posterior, summary, waic
Ejemplo n.º 11
0
    data = load_data(disease, prediction_region, county_info)
    data_train, target_train, data_test, target_test = split_data(data)
    tspan = (target_train.index[0], target_train.index[-1])
    waics = {}
    for (name, (use_age, use_eastwest)) in age_eastwest_by_name.items():
        if disease == "borreliosis":
            use_eastwest = False
        # load sample trace
        trace = load_trace(disease, use_age, use_eastwest)

        # load model
        model = load_model(disease, use_age, use_eastwest)

        with model:
            waics[name] = pm.waic(trace).WAIC

    # do model selection
    best_key = min(waics, key=waics.get)

    use_age, use_eastwest = age_eastwest_by_name[best_key]
    if disease == "borreliosis":
        use_eastwest = False
    best_model[disease] = {
        "name": best_key,
        "use_age": use_age,
        "use_eastwest": use_eastwest,
        "comparison": waics
    }

with open('../data/comparison.pkl', "wb") as f:
Ejemplo n.º 12
0
def runExperimentsBiased(ml = True, size = [100], components = [8], method = [7], classes = ['rrlyr'],
                    fit_iterations = 20000, id_col_= 'ID', name_class_col_= 'Class', biasedSplit = False, ModifiedPrior=False, alpha_ = 0.1,
                    onetoOne = True, DropEasy_ = True, priors_ = 'normal', oneToOne_ = False, PCA_ = True, modeltoFit = 'RL', kernel = False,
                    poli = 3, n_hidden_ = 4, njobs= 16, **kwargs):

    print('PCA: '+str(PCA_))
    marginal_likelihood = 0
    res = []
    dim = components[0]
    print(dim)
    for i in classes:
        print('Class: ', i)
        dataTrain = pd.read_csv('data/BIASEDFATS/Train_OGLE_'+i+'.csv')
        dataTest = pd.read_csv('data/BIASEDFATS/Test_OGLE_'+i+'.csv')
        time_ml_i = []

        dataTrain = ut.down_sampling(dataTrain)
        samples = dataTrain.shape[0]

        maxSample = size[0]
        if samples > maxSample:
            samples = maxSample

            dataTrain = dataTrain.sample(samples, random_state =0)
            print('after down_sampling: ')
            '''print('train: ')
            print(data_train.label.value_counts())
            print('test: ')
            print(data_test.label.value_counts())
            '''
        print('The dataset contains:', samples, 'samples')

        try:
            del dataTrain['Unnamed: 0']
            del dataTest['Unnamed: 0']
            dataTrain = dataTrain.loc[:, ~dataTrain.columns.str.contains('^Unnamed')]
            dataTest = dataTest.loc[:, ~dataTest.columns.str.contains('^Unnamed')]
            del dataTrain['ID']
            del dataTest['ID']

            yTrain = dataTrain['label']
            yTest = dataTest['label']

            del dataTrain['label']
            del dataTest['label']

            try:
                dataTrain =  dataTrain.drop(['Pred', 'Pred2', 'h', 'e', 'u'], axis = 1)
                dataTest =  dataTest.drop(['Pred', 'Pred2', 'h', 'e', 'u'], axis = 1)
                dataTrain = dataTrain.loc[:, dataTrain.var()!=0.0]
                dataTest = dataTest.loc[:, dataTest.var()!=0.0]
            except:
                print('---')
        except:
            print('---')

        mostImportantF = True
        if PCA_ == False:
            print('PCA False')
            if mostImportantF == True:
                c_comp = 0
                RelevantFeatures = ['PeriodLS','CAR_tau','CAR_mean', 'CAR_sigma','Meanvariance', 'Skew', 'PercentDifferenceFluxPercentile','Gskew',
                'Class_col', 'Psi_CS', 'Psi_eta','SlottedA_length', 'RCs']
                xTrain =  ut.most_important_features(dataTrain, RelevantFeatures)
                xTest  =  ut.most_important_features(dataTest, RelevantFeatures)
            else:
                xTrain = dataTrain
                xTest = dataTest
            print('Running without dimentional reduction, forget argument components')
            print('before: ', xTrain.head())
            xTrain=(xTrain-xTrain.mean())/xTrain.std()
            xTest=(xTest-xTest.mean())/xTest.std()
            print('after: ')
            print(xTrain.head())

            if kernel == True:
                xTest = ut.kernelPolinomial(np.asanyarray(xTest),poli)
                xTrain = ut.kernelPolinomial(np.asanyarray(xTrain),poli)

        else:
            c_comp = components[0]
            print('compnents: ', c_comp)
            dataTrain=(dataTrain-dataTrain.mean())/dataTrain.std()
            dataTest=(dataTest-dataTest.mean())/dataTest.std()
            xTrain, yTrain = ut.dim_reduction(dataTrain, yTrain, c_comp)
            xTest, yTest = ut.dim_reduction(dataTest, yTest, c_comp)
            if kernel == True:
                xTest = ut.kernelPolinomial(xTest,poli)
                xTrain = ut.kernelPolinomial(xTrain,poli)
            else:
                xTest = pd.DataFrame(data = xTest, columns = ['PC'+str(i) for i in range(c_comp)])
                xTrain = pd.DataFrame(data = xTrain, columns = ['PC'+str(i) for i in range(c_comp)])

        xTrain['Class'] = yTrain.values
        xTest['Class']  = yTest.values
        DataTest, DataTrain = xTest, xTrain
        del xTest
        del xTrain

        acc_kfold_Train = []
        f1_kfold_Train = []

        skf = StratifiedKFold(n_splits=int(5))
        skf.get_n_splits(DataTrain, yTrain)
        start_1 = timeit.default_timer()


        for train_index, test_index in skf.split(DataTrain, yTrain):
            X_train, X_test = DataTrain.iloc[train_index,:], DataTrain.iloc[test_index,:]
            y_train, y_test = yTrain.iloc[train_index], yTrain.iloc[test_index]
            print('y_train')
            print((y_train.head()))


            model = bm.LogisticRegressionBinomialPrior(X_train, var_label1=kwargs['class_1'], var_label2=kwargs['class_2'],
                                               biasedSplit = biasedSplit, onetoOne = onetoOne, priors = priors_,
                                               className = name_class_col_, PCA =PCA_)



            trace, model, map = bm.fitbayesianmodel(model, ytrain= y_train, method=method[0],
                                                    n_=int(fit_iterations/njobs), MAP = False,
                                                    jobs  = njobs, star = i, classifier =modeltoFit,
                                                    PCA = PCA_)

            r = ut.get_z(X_train, trace = trace, model=model, burn_in = 500)
            predictions_1_Train = (ut.logistic_function_(r).mean(axis=1)>0.5).astype(int)

            y_train  = 1*(y_train == 'class_a')
            accTrain = accuracy_score(y_train, predictions_1_Train, normalize=True)
            f1Train = f1_score(y_train, predictions_1_Train, pos_label = 1)
            cm = confusion_matrix(y_train, predictions_1_Train)
            print('Accuracy train: ', accTrain)
            print('Accuracy f1 train: ', f1Train)
            acc_kfold_Train.append(accTrain)
            f1_kfold_Train.append(f1Train)

        accTrain = np.mean(acc_kfold_Train)
        f1Train =np.mean(f1Train)
        print('Mean Accuracy train: ', accTrain)
        print('Mean f1 train: ', f1Train)
        stop_1 = timeit.default_timer()
        time_CV = stop_1 - start_1



        start_post = timeit.default_timer()
        model = bm.LogisticRegressionBinomialPrior(DataTrain,
                                            var_label1=kwargs['class_1'], var_label2=kwargs['class_2'],
                                           biasedSplit = biasedSplit, onetoOne = onetoOne, priors = priors_,
                                           className = name_class_col_, PCA =PCA)
        trace, model, map = bm.fitbayesianmodel(model, ytrain= yTrain, method=method[0],
                                                n_=int(fit_iterations/njobs), MAP = False,
                                                jobs  = njobs, star = i, classifier =modeltoFit,
                                                PCA = PCA_)
        stop_post = timeit.default_timer()
        time_post = stop_post - start_post

        del DataTest['Class']
        r = ut.get_z(DataTest, trace = trace, model=model, burn_in = 500)
        predictions_1_Test = (ut.logistic_function_(r).mean(axis=1)>0.5).astype(int)
        yTest  = 1*(yTest == 'class_a')
        accTest = accuracy_score(yTest, predictions_1_Test, normalize=True)
        f1Test = f1_score(yTest, predictions_1_Test, pos_label = 1)
        print('Accuracy train: ', accTest)
        print('Accuracy f1 train: ', f1Test)

        gelRub = pm.diagnostics.gelman_rubin(trace)
        print('gelRub: ', gelRub)
        try:
             if(ml == True):
                 start_2 = timeit.default_timer()
                 logml_dict = bs.Marginal_llk(trace, model=model, maxiter=100000)
                 print('Estimated Marginal log-Likelihood %.5f'%(logml_dict['logml']))
                 marginal_likelihood = logml_dict['logml']
                 stop_2 = timeit.default_timer()
                 time_ml = stop_2 - start_2
                 try:
                     print('WAIC Estimation')
                     RLB_waic  = pm.waic(trace, model)
                     waic = RLB_waic.WAIC
                     print(waic)
                 except:
                     waic = 0

                 listData = [c_comp, poli, method, time_post, time_ml, marginal_likelihood, waic, gelRub, accTrain, accTest,
                 f1Train, f1Test]
                 time_ml_i.append(listData)
             else:
                 time_ml_i.append([c_comp, 'null', time_post, 'null', 'null', 'null', 'null', gelRub, accTrain, accTest,
                 f1Train, f1Test])
        except:
            print('marginal likelihood does not estimated')
        print('exporting model')
        df = pd.DataFrame(np.asanyarray(time_ml_i))
        print(df.head())
        df.to_csv('Results/summaryMCMC/0309/'+'dataAnalysis_Features_'+i+'_'+modeltoFit+'_'+str(c_comp)+'_'+str(dim)+'_'+str(kernel)+'_'+'.csv')
        print("return the last model and trace")
        res.append([marginal_likelihood, model, trace, map, i, gelRub, accTrain, accTest,
        f1Train, f1Test, time_CV, time_ml, time_post])
    return res
Ejemplo n.º 13
0
beta_p_post = chain_p['beta'].mean(axis=0)
y_p_post = alpha_p_post + np.dot(beta_p_post, x_1s)

plt.plot(x_1s[0][idx], y_p_post[idx], label='Pol order {}'.format(order))

plt.scatter(x_1s[0], y_1s)
plt.xlabel('$x$', fontsize=16)
plt.ylabel('$y$', fontsize=16, rotation=0)
plt.legend()
plt.savefig('img605.png')


print (pm.dic(trace=trace_l, model=model_l))
print( pm.dic(trace=trace_p, model=model_p))

waic_l = pm.waic(trace=trace_l, model=model_l)
waic_p = pm.waic(trace=trace_p, model=model_p)
loo_l = pm.loo(trace=trace_l, model=model_l)
loo_p = pm.loo(trace=trace_p, model=model_p)

plt.figure()
plt.subplot(121)
for idx, ic in enumerate((waic_l, waic_p)):
	plt.errorbar(ic[0], idx, xerr=ic[1], fmt='bo')
plt.title('WAIC')
plt.yticks([0, 1], ['linear', 'quadratic'])
plt.ylim(-1, 2)

plt.subplot(122)
for idx, ic in enumerate((loo_l, loo_p)):
	plt.errorbar(ic[0], idx, xerr=ic[1], fmt='go')
Ejemplo n.º 14
0
X_shared = [
    shared(np.asarray(X_train[parameters[i]].values))
    for i in range(len(parameters))
]
y_shared = shared(np.asarray(y_train))

n_tracts = len(X_train.GIDTR.values)
tract_idx = [i for i in range(n_tracts)]
idx_shared = shared(np.asarray(tract_idx))

try:
    model = make_model()

    trace = train_model(model)
except Exception as e:
    print("An exception was caught: ", e)

h_waic = pm.waic(trace, model)
f = open("waic.txt", "w+")
f.write(str(h_waic))
f.close()

## Predict for train, test
predict_and_save(X_train, y_train, "train", trace, model)
predict_and_save(X_test, y_test, "test", trace, model)

## Predict for all states
for state_name in state_names:
    state_data = state_to_data[state_name]
    predict_and_save(state_data, state_data.MRR_2010, state_name, trace, model)
Ejemplo n.º 15
0
    step1 = pm.Slice([tau1, a_0])
    trace2 = pm.sample(1000, tune=500, step=step1)
chain2 = trace2
varnames1 = [ 'a0', 'δ', 'sigma', 'tau1']
pm.plot_posterior(chain2, varnames1, kde_plot=True)
plt.show()

pm.energyplot(chain2)  # 能量图对比,重合度越高表示模型越优
plt.show()
# 画出自相关曲线
varnames1 = [ 'a0', 'δ', 'sigma', 'tau1']
pm.autocorrplot(chain2, varnames1)
plt.show()
print(pm.df_summary(chain2, varnames1))

print(pm.waic(trace=trace2, model=partial_model))
# ======================================================================
# 后验分析:
# 画出后验与原始图形对比图
#
# ======================================================================
# Bx_.set_value([7,8] , [5,6])
with partial_model:
    pp_trace = pm.sample_ppc(trace2, 1000)

# pp_trace['Observed'].mean(axis=0)

fig, ax = plt.subplots(figsize=(8, 6))
# ax.plot(x_plot, spline(x_plot), c='k', label="True function")
# 原始图形
# j, k1 = 0, 6
Ejemplo n.º 16
0
    p3 = ax.plot(xx, HER_mean, marker='o', color='r', markerfacecolor='None', markersize=10, linewidth=0, label='Mean')
    p4 = ax.plot(xx, HER_hi, marker='_', color='k', markersize=10, linewidth=0, label='High')
    # Vertical lines closing up whiskers
    for i in range(num_obs):
        ax.plot(np.array([i,i]), np.array([HER_lo[i], HER_hi[i]]), marker=None, color='k')

    # Legend
    handles = [p1[0], p2[0], p3[0], p4[0]]
    labels = ['Data', 'Low', 'Mean', 'High']
    ax.legend(handles, labels)
    ax.grid()
    plt.show()

# *************************************************************************************************
# Compute WAIC for both models
waic_base = pm.waic(trace_base, model_base)
waic_sex = pm.waic(trace_sex, model_sex)
# Set model names
model_base.name = 'base'
model_sex.name = 'sex'
# Comparison of WAIC
comp_WAIC_base_v_sex = pm.compare({model_base: trace_base, model_sex: trace_sex})
display(comp_WAIC_base_v_sex)
pm.compareplot(comp_WAIC_base_v_sex)

# Generate the posterior predictive in both base and sex models
try:
    post_pred_base = vartbl['post_pred_base']
    post_pred_sex = vartbl['post_pred_sex']
    print(f'Loaded posterior predictive for base and sex models.')
except:
Ejemplo n.º 17
0
    trace_BF_0 = pm.sample(5000)
chain_BF_0 = trace_BF_0[500:]
pm.traceplot(trace_BF_0)
plt.show()

with pm.Model() as model_BF_1:
    theta = pm.Beta('theta', 8, 4)
    y = pm.Bernoulli('y', theta, observed=y)

    trace_BF_1 = pm.sample(5000)
chain_BF_1 = trace_BF_1[500:]
pm.traceplot(chain_BF_1)
plt.show()

# the smaller the better
waic_0 = pm.waic(chain_BF_0, model_BF_0)
waic_1 = pm.waic(chain_BF_1, model_BF_1)

loo_0 = pm.loo(chain_BF_0, model_BF_0)
loo_1 = pm.loo(chain_BF_1, model_BF_1)

plt.figure(figsize=(8, 4))
plt.subplot(121)
for idx, ic in enumerate((waic_0, waic_1)):
    plt.errorbar(ic[0], idx, xerr=ic[1], fmt='bo')
plt.title('WAIC')
plt.yticks([0, 1], ['model_0', 'model_1'])
plt.ylim(-1, 2)

plt.subplot(122)
for idx, ic in enumerate((loo_0, loo_1)):
Ejemplo n.º 18
0
pm.summary(trace)
az.summary(trace)
#pm.gelman_rubin(trace)
with m6_11:
    az.plot_trace(trace)
plt.show()
az.plot_autocorr(trace)
plt.show()
az.plot_density(trace)
plt.show()
az.plot_forest(trace)
plt.show()

# might need to multiply by -2 to compare with McElreath
with m6_11:
    print(pm.waic(trace))
    print(pm.loo(trace))


#m6_13 = pm.Model()
with pm.Model() as m6_13:
    alpha = pm.Uniform('alpha', 0, 5)
    bm = pm.Uniform('bm', -10, 10)
    log_sigma = pm.Uniform('log_sigma', -10, 10)
    mu = alpha + bm*d['lmass']
    y_obs = pm.Normal('y_obs', mu=mu, sigma=np.exp(log_sigma), observed=d['kcal.per.g'])
    trace = pm.sample(2000, return_inferencedata=True, chains=2)

with m6_13:
    print(pm.summary(trace))
    print(pm.waic(trace))
Ejemplo n.º 19
0
pm.summary(trace_m4, alpha=0.11)


# Making the slope conditional
with pm.Model() as m5:
    α = pm.Normal('α', 0, 0.1, shape=2)
    β = pm.Normal('β', 0, 0.3, shape=2)
    σ = pm.Exponential('σ', 1)
    μ = α[dfinal.cont_africa.values] + β[dfinal.cont_africa.values] * (dfinal.rugged_s.values - rbar)
    log_gdp_s_i = pm.Normal('log_gdp_s_i', μ, σ, observed=dfinal.log_gdp_s.values)
    trace_m5 = pm.sample()
pm.summary(trace_m5, alpha=0.11).round(decimals=2)
m5.name = 'm5'
pm.compare({m3: trace_m3, m4: trace_m4, m5: trace_m5}, ic='LOO')
waic_list = pm.waic(trace_m5, model=m5, pointwise=True)

loo_list = pm.loo(trace_m5, model=m5, pointwise=True)

pl.plot(waic_list.WAIC_i, marker='.', ls='', color='k');
pl.plot(loo_list.LOO_i, marker='s', ls='', markeredgecolor='r');


dfinal.head()
# Plotting the interaction

% matplotlib inline
_, axs = pl.subplots(ncols=2, figsize=(8,4))
ttls = ['Non-African', 'African']
df_m5 = pm.trace_to_dataframe(trace_m5)
for i, (axi, ttl) in enumerate(zip(axs, ttls)):
Ejemplo n.º 20
0
        label="wiFitting estimate")
# ax.plot(x_plot, XZ_meanC, label="true estimate")
# ax.plot(elec_year[116:], betaMAPC[:], marker='*', alpha=.8, label="Fitting estimate")
# ax.set_xlim(0, 1)
ax.legend()
plt.show()

# ================================================================================
ax = pm.energyplot(trace_2)
bfmi = pm.bfmi(trace_2)
ax.set_title(f"BFMI = {bfmi:.2f}")
plt.show()

WAIC1 = pm.compare([trace_1, trace_2], [model_1, model_2])
print('WAIC1: ', WAIC1)
WAIC = pm.waic(trace=trace_1, model=model_1)
DIC = pm.dic(trace=trace_1, model=model_1)
print(WAIC)
print('DIC: ', DIC)


# ================================================================================
# 计算均方误差
def Rmse(predictions, targets):
    return np.sqrt(np.mean((predictions - targets)**2))


# 计算均方误差
ALL_faults = (elec_data.Fault.values / elec_data.Nums.values)  # 数组形式,计算故障率大小
MAP_tmp = MAP_tmp / 1000
rmse2 = {}
Ejemplo n.º 21
0
# 两种能量图
energy = trace2['energy']
energy_diff = np.diff(energy)
sns.distplot(energy - energy.mean(), label='energy')
sns.distplot(energy_diff, label='energy diff')
plt.legend()
plt.show()
pm.energyplot(trace2)
plt.show()
map_estimate = pm.find_MAP(model=unpooled_model)
print(map_estimate)
# 画出自相关曲线
pm.autocorrplot(chain2, varnames2)
plt.show()
print(pm.waic(trace2, unpooled_model))

#
with unpooled_model:
    post_pred = pm.sample_ppc(trace2)
plt.figure(figsize=(6, 4.5), facecolor=(1, 1, 1))
plt.figure()
# ppc = post_pred['Observed'] # 更改数据排列即可以画出分类图
# ax = sns.violinplot(data=ppc)
# plt.show()

ax = sns.distplot(post_pred['Observed'].mean(axis=1))
# ax = sns.distplot(y_shared.mean(axis=1), label='Posterior predictive means')
# ax.axvline(post_pred['Observed'].mean(), color='b', ls='--', label='Post mean')
ax.axvline(elec_faults.mean(), color='r', ls='--')
ax.set_xlabel(u"故障率均值", fontsize=14, fontproperties=font)
Ejemplo n.º 22
0
                           alpha_sg, alpha_sl, gamma, amb_gain_est,
                           amb_loss_est)

        # Make sure we don't have zeros or ones
        p = beta_response_transform_t(p)  # remove zeros and ones

        # Likelihood
        likelihood = pm.Bernoulli('likelihood', p=p, observed=choices)

        # FIT MODEL USING ADVI
        with model:
            approx = pm.fit(method='advi', n=60000)
        trace = approx.sample(4000)

        # Get WAIC
        waic = pm.waic(trace, model, scale='deviance')

        # Sample from posterior
        ppc = pm.sample_posterior_predictive(trace,
                                             samples=2000,
                                             model=model,
                                             var_names=[
                                                 i.name
                                                 for i in model.deterministics
                                                 if 'estimated' in i.name
                                             ])

        # Extract parameters etc
        fitting_results = pm.summary(trace)

        fitting_results = fitting_results[
Ejemplo n.º 23
0
    def get_weights(self, predictions_aapl, predictions_msft, predictions_bac,
                    observations_aapl):
        N_SAMPLES = 1000
        N_TUNES = 1000

        sigma_start = np.std(observations_aapl)
        aplha_start = 1
        beta_start = 0

        # predictions_shared = theano.shared(predictions_aapl)
        predictions = np.stack(
            [predictions_aapl, predictions_msft, predictions_bac])

        with pm.Model() as model:
            sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start)
            alpha = pm.Normal('alpha',
                              mu=1,
                              sd=1,
                              testval=aplha_start,
                              shape=3)
            beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start, shape=3)
            mu = alpha * predictions + beta
            p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl)
            trace_model = pm.sample(N_SAMPLES, tune=N_TUNES)

        with pm.Model() as model_aapl:
            sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start)
            alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start)
            beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start)
            mu = alpha * predictions_aapl + beta
            p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl)
            trace_model_aapl = pm.sample(N_SAMPLES, tune=N_TUNES)

        with pm.Model() as model_msft:
            sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start)
            alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start)
            beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start)
            mu = alpha * predictions_msft + beta
            p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl)
            trace_model_msft = pm.sample(N_SAMPLES, tune=N_TUNES)

        with pm.Model() as model_bac:
            sigma = pm.HalfNormal('sigma', 0.1, testval=aplha_start)
            alpha = pm.Normal('alpha', mu=1, sd=1, testval=aplha_start)
            beta = pm.Normal('beta', mu=0, sd=1, testval=beta_start)
            mu = alpha * predictions_bac + beta
            p = pm.Normal('p', mu=mu, sd=sigma, observed=observations_aapl)
            trace_model_bac = pm.sample(N_SAMPLES, tune=N_TUNES)

        compare_1 = pm.compare(
            [trace_model_aapl, trace_model_msft, trace_model_bac],
            [model_aapl, model_msft, model_bac],
            method='pseudo-BMA')
        compare_2 = pm.compare(
            [trace_model_msft, trace_model_bac, trace_model_aapl],
            [model_msft, model_bac, model_aapl],
            method='pseudo-BMA')

        compare_3 = pm.compare(
            [trace_model_aapl, trace_model_msft, trace_model_bac],
            [model_aapl, model_msft, model_bac],
            method='BB-pseudo-BMA')

        compare_4 = pm.compare(
            [trace_model_aapl, trace_model_msft, trace_model_bac],
            [model_aapl, model_msft, model_bac],
            method='stacking')

        compare_5 = pm.compare([trace_model_msft, trace_model_bac],
                               [model_msft, model_bac],
                               method='pseudo-BMA')

        compare_6 = pm.compare([trace_model_aapl, trace_model_msft],
                               [model_aapl, model_msft],
                               method='BB-pseudo-BMA')

        compare_7 = pm.compare([trace_model_aapl, trace_model_msft],
                               [model_aapl, model_msft],
                               method='stacking')

        # pm.traceplot(trace_model)

        d = pd.read_csv('data/milk.csv', sep=';')
        d['neocortex'] = d['neocortex.perc'] / 100
        d.dropna(inplace=True)
        d.shape

        a_start = d['kcal.per.g'].mean()
        sigma_start = d['kcal.per.g'].std()

        mass_shared = theano.shared(np.log(d['mass'].values))
        neocortex_shared = theano.shared(d['neocortex'].values)

        with pm.Model() as m6_11:
            alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start)
            mu = alpha + 0 * neocortex_shared
            sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start)
            kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g'])
            trace_m6_11 = pm.sample(1000, tune=1000)

        pm.traceplot(trace_m6_11)

        with pm.Model() as m6_12:
            alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start)
            beta = pm.Normal('beta', mu=0, sd=10)
            sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start)
            mu = alpha + beta * neocortex_shared
            kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g'])
            trace_m6_12 = pm.sample(1000, tune=1000)

        with pm.Model() as m6_13:
            alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start)
            beta = pm.Normal('beta', mu=0, sd=10)
            sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start)
            mu = alpha + beta * mass_shared
            kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g'])
            trace_m6_13 = pm.sample(1000, tune=1000)

        with pm.Model() as m6_14:
            alpha = pm.Normal('alpha', mu=0, sd=10, testval=a_start)
            beta = pm.Normal('beta', mu=0, sd=10, shape=2)
            sigma = pm.HalfCauchy('sigma', beta=10, testval=sigma_start)
            mu = alpha + beta[0] * mass_shared + beta[1] * neocortex_shared
            kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=d['kcal.per.g'])
            trace_m6_14 = pm.sample(1000, tune=1000)

        pm.waic(trace_m6_14, m6_14)

        compare_df = pm.compare(
            [trace_m6_11, trace_m6_12, trace_m6_13, trace_m6_14],
            [m6_11, m6_12, m6_13, m6_14],
            method='pseudo-BMA')

        compare_df.loc[:, 'model'] = pd.Series(
            ['m6.11', 'm6.12', 'm6.13', 'm6.14'])
        compare_df = compare_df.set_index('model')
        compare_df

        pm.compareplot(compare_df)
 def get_waic(self):
     return pm.waic(trace=self.trace_, model=self.model)
Ejemplo n.º 25
0
    #     # Draw samples
    #     map_estimate = pm.find_MAP()
    #
    # map_gen_rec = true_params.append(pd.DataFrame([map_estimate[param_name].flatten() for param_name in param_names], index=param_names))
    # map_gen_rec.to_csv(save_dir + save_id + '_map_gen_rec.csv')
    # if not run_on_cluster:
    #     plot_gen_rec(param_names=param_names, gen_rec=map_gen_rec, save_name=save_dir + save_id + '_map_gen_rec_plot.png')
    #
    # with model:
    MCMC_trace = pm.sample(n_samples,
                           tune=n_tune,
                           chains=n_chains,
                           cores=n_cores)  #, start=map_estimate

print("WAIC: {0}".format(pm.waic(MCMC_trace, model).WAIC))
MCMC_model_summary = pm.summary(MCMC_trace)
pd.DataFrame(MCMC_model_summary).to_csv(save_dir + save_id + '_summary.csv')
mcmc_params = np.full((len(param_names), n_subj), np.nan)
for i, param_name in enumerate(param_names):
    idxs = MCMC_model_summary.index.str.contains(param_name + '__')
    mcmc_params[i] = np.array(MCMC_model_summary.loc[idxs, 'mean'])
mcmc_params = pd.DataFrame(mcmc_params, index=param_names)
mcmc_gen_rec = true_params.append(mcmc_params)
mcmc_gen_rec.to_csv(save_dir + save_id + '_mcmc_gen_rec.csv')

if not run_on_cluster:
    pm.traceplot(MCMC_trace)
    plt.savefig(save_dir + save_id + '_traceplot.png')
    plot_gen_rec(param_names=param_names,
                 gen_rec=mcmc_gen_rec,
Ejemplo n.º 26
0
disease = "covid19"
best_model = {}

print("Evaluating model for {}...".format(disease))

prediction_region = "germany"

#data = load_daily_data(disease, prediction_region, county_info)
#data_train, target_train, data_test, target_test = split_data(data)

#tspan = (target_train.index[0], target_train.index[-1])
waics = {}
# reintroduce combinations as we have the right set of models! // use_eastwest is dummy!
# for (name, (use_interaction, use_report_delay)) in ia_delay_by_name.items():
for (i, _) in enumerate(combinations):
    # load sample trace
    try:
        trace = load_trace_by_i(disease, i)
    except:
        print("Model nr. {} does not exist, skipping...\n".format(i))
        continue
    # load model
    model = load_model_by_i(disease, i)

    with model:
        waics[str(i)] = pm.waic(trace).WAIC

with open('../data/waics.pkl', "wb") as f:
    pkl.dump(waics, f)
Ejemplo n.º 27
0
    trace_DUAK = vartbl['trace_DUAK']
    print(f'Loaded samples for the District-Urban-Age-Kids model in trace_DUAK.')
except:
    print(f'Sampling from District-Urban-Age-Kids model...')
    with model_DUAK:
        nuts_kwargs = {'target_accept': 0.90}
        trace_DUAK = pm.sample(draws=num_samples, tune=num_tune, nuts_kwargs=nuts_kwargs, chains=chains, cores=cores)
    vartbl['trace_DUAK'] = trace_DUAK
    save_vartbl(vartbl, fname)

# *************************************************************************************************
# B6 Use WAIC to compare your models. What are your conclusions?
# *************************************************************************************************

# Compute WAIC for each model under consideration
waic_fe = pm.waic(trace_fe, model_fe)
waic_ve = pm.waic(trace_ve, model_ve)
waic_DUA = pm.waic(trace_DUA, model_DUA)
waic_DUAK = pm.waic(trace_DUAK, model_DUAK)

# Set the names of these models
model_fe.name = 'FixedEffect'
model_ve.name = 'VariableEffect'
model_DUA.name = 'DistrictUrbanAge'
model_DUAK.name = 'DistrictUrbanAgeKids'

# Compare the models
df_model_comp = pm.compare({model_fe: trace_fe,
                            model_ve: trace_ve,
                            model_DUA: trace_DUA,
                            model_DUAK: trace_DUAK})