Exemple #1
0
    def test_glm_from_formula(self):
        with Model() as model:
            NAME = 'glm'
            GLM.from_formula('y ~ x', self.data_linear, name=NAME)
            start = find_MAP()
            step = Slice(model.vars)
            trace = sample(500, step=step, start=start, progressbar=False, random_seed=self.random_seed)

            assert round(abs(np.mean(trace['%s_Intercept' % NAME])-self.intercept), 1) == 0
            assert round(abs(np.mean(trace['%s_x' % NAME])-self.slope), 1) == 0
            assert round(abs(np.mean(trace['%s_sd' % NAME])-self.sd), 1) == 0
Exemple #2
0
    def test_glm_from_formula(self):
        with Model() as model:
            NAME = 'glm'
            GLM.from_formula('y ~ x', self.data_linear, name=NAME)
            start = find_MAP()
            step = Slice(model.vars)
            trace = sample(500, step=step, start=start, progressbar=False, random_seed=self.random_seed)

            self.assertAlmostEqual(np.mean(trace['%s_Intercept' % NAME]), self.intercept, 1)
            self.assertAlmostEqual(np.mean(trace['%s_x' % NAME]), self.slope, 1)
            self.assertAlmostEqual(np.mean(trace['%s_sd' % NAME]), self.sd, 1)
Exemple #3
0
    def test_glm_from_formula(self):
        with Model() as model:
            NAME = "glm"
            GLM.from_formula("y ~ x", self.data_linear, name=NAME)
            start = find_MAP()
            step = Slice(model.vars)
            trace = sample(
                500, tune=0, step=step, start=start, progressbar=False, random_seed=self.random_seed
            )

            assert round(abs(np.mean(trace["%s_Intercept" % NAME]) - self.intercept), 1) == 0
            assert round(abs(np.mean(trace["%s_x" % NAME]) - self.slope), 1) == 0
            assert round(abs(np.mean(trace["%s_sd" % NAME]) - self.sigma), 1) == 0
Exemple #4
0
 def test_glm(self):
     with Model() as model:
         vars_to_create = {"glm_sd", "glm_sd_log__", "glm_y", "glm_x0", "glm_Intercept"}
         GLM(self.data_linear["x"], self.data_linear["y"], name="glm")
         start = find_MAP()
         step = Slice(model.vars)
         trace = sample(
             500, tune=0, step=step, start=start, progressbar=False, random_seed=self.random_seed
         )
         assert round(abs(np.mean(trace["glm_Intercept"]) - self.intercept), 1) == 0
         assert round(abs(np.mean(trace["glm_x0"]) - self.slope), 1) == 0
         assert round(abs(np.mean(trace["glm_sd"]) - self.sigma), 1) == 0
         assert vars_to_create == set(model.named_vars.keys())
Exemple #5
0
 def test_glm(self):
     with Model() as model:
         vars_to_create = {
             'glm_sd_log__', 'glm_y', 'glm_x0', 'glm_Intercept'
         }
         GLM(self.data_linear['x'], self.data_linear['y'], name='glm')
         start = find_MAP()
         step = Slice(model.vars)
         trace = sample(500,
                        step=step,
                        start=start,
                        progressbar=False,
                        random_seed=self.random_seed)
         assert round(abs(np.mean(trace['glm_Intercept']) - self.intercept),
                      1) == 0
         assert round(abs(np.mean(trace['glm_x0']) - self.slope), 1) == 0
         assert round(abs(np.mean(trace['glm_sd']) - self.sd), 1) == 0
         assert vars_to_create == set(model.named_vars.keys())
Exemple #6
0
 def test_glm(self):
     with Model() as model:
         vars_to_create = {
             'glm_sd_log_',
             'glm_y',
             'glm_x0',
             'glm_Intercept'
         }
         GLM(
             self.data_linear['x'],
             self.data_linear['y'],
             name='glm'
         )
         start = find_MAP()
         step = Slice(model.vars)
         trace = sample(500, step=step, start=start, progressbar=False, random_seed=self.random_seed)
         self.assertAlmostEqual(np.mean(trace['glm_Intercept']), self.intercept, 1)
         self.assertAlmostEqual(np.mean(trace['glm_x0']), self.slope, 1)
         self.assertAlmostEqual(np.mean(trace['glm_sd']), self.sd, 1)
         self.assertSetEqual(vars_to_create, set(model.named_vars.keys()))
Exemple #7
0
 def test_strange_types(self):
     with Model():
         with pytest.raises(ValueError):
             GLM(1, self.data_linear["y"], name="lm")
def pooled_model(X,y):
    '''
    This function build a pooled model in PyMC3. This function will only work
    with four independent variables in the X matrix.

    INPUT: X - an np array containing a standardized variable matrix
               with four variables
           y - an np array containing the target values
    OUTPUT: pooled_model - a PyMC3 model object
            trace - a PyMC3 trace object
    '''

    data = dict(x1=X[:,0], x2=X[:,1], x3=X[:,2], x4=X[:,3], y=y)
    with Model() as pooled_model:
    # specify glm and pass in data. The resulting linear model, its likelihood and
    # and all its parameters are automatically added to our model.
    GLM.from_formula('y ~ 1 + x1 + x2 + x3 +x4', data)
    # draw 3000 posterior samples using NUTS sampling
    trace = sample(1000, n_init=50000, tune=1000, njobs=1)

    return pooled_model, trace

def unpooled_model(X, y, level, n_levels):
    '''
    This function build a unpooled model in PyMC3. This function will only work
    with four independent variables in the X matrix.

    INPUT: X - an np array containing a standardized variable matrix
               with four variables
           y - an np array containing the target values
           level - an array with the level value for each row of the matrix
           n_levels - an INT indicating the number of unique level names
    OUTPUT: unpooled_model - a PyMC3 model object
            unpooled_trace - a PyMC3 trace object
    '''

    with Model() as unpooled_model:

    intercept = Normal('intercept', 0, sd=1e5, shape=n_levels)
    beta1 = Normal('beta1', 0, sd=1e5)
    beta2 = Normal('beta2', 0, sd=1e5)
    beta3 = Normal('beta3', 0, sd=1e5)
    beta4 = Normal('beta4', 0, sd=1e5)

    sigma = HalfCauchy('sigma', 5)

    theta = intercept[level] + beta1 * X[:,0] + beta2 * X[:,1] + beta3 * X[:,2] + beta4 * X[:,3]
    y = Normal('y', theta, sd=sigma, observed=y)

    with unpooled_model:
    unpooled_trace = sample(1000, n_init=50000, tune=1000)

    return unpooled_model, unpooled_trace


def multi_model(X, y, level, n_levels):
    '''
    This function build a unpooled model in PyMC3. This function will only work
    with four independent variables in the X matrix.

    INPUT: X - an np array containing a standardized variable matrix
               with four variables
           y - an np array containing the target values
           level - an array with the level value for each row of the matrix
           n_levels - an INT indicating the number of unique level names
    OUTPUT: multi_model - a PyMC3 model object
            multi_trace  - a PyMC3 trace object
    '''
    
    with Model() as multi_model:
    #set intercept hyper priors
    mu_intercept = Normal('mu_intercep', mu=0., sd=1e5)
    sigma_intercept = HalfCauchy('sigma_intercep', 5)

    #set beta1 hyper priors
    mu_beta1 = Normal('mu_beta1', mu=0., sd=1e5)
    sigma_beta1 = HalfCauchy('sigma_beta1', 5)

    #set beta2 hyper priors
    mu_beta2 = Normal('mu_beta2', mu=0., sd=1e5)
    sigma_beta2 = HalfCauchy('sigma_beta2', 5)

    #set beta3 hyper priors
    mu_beta3 = Normal('mu_beta3', mu=0., sd=1e5)
    sigma_beta3 = HalfCauchy('sigma_beta3', 5)

    #set beta4 hyper priors
    mu_beta4 = Normal('mu_beta4', mu=0., sd=1e5)
    sigma_beta4 = HalfCauchy('sigma_beta4', 5)

    intercept = Normal('intercept', mu=mu_intercept, sd=sigma_intercept, shape=n_levels)
    beta1 = Normal('beta1', mu=mu_beta1, sd=sigma_beta1, shape=n_levels)
    beta2 = Normal('beta2', mu=mu_beta2, sd=sigma_beta2, shape=n_levels)
    beta3 = Normal('beta3', mu=mu_beta3, sd=sigma_beta3, shape=n_levels)
    beta4 = Normal('beta4', mu=mu_beta3, sd=sigma_beta4, shape=n_levels)

    sigma = HalfCauchy('sigma', 5)

    HIV_like = intercept[level] + beta1[level] * X[:,0] + beta2[level] * X[:,1] + beta3[level] * X[:,2] + beta4[level] * X[:,3]
    y = Normal('y', HIV_like, sd=sigma, observed=y)

    with multi_model:
    multi_trace = sample(1000, n_init=150000, tune=50000)


    return multi_model, multi_trace


def score_model(model, trace):

    waic_score = stats.waic(model=model, trace=trace)
    loo_score = stats.loo(model=pooled_model_X, trace=pooled_X_trace)

    print('WAIC for this model is {} ({})'.format(round(waic_score[0], 2),
                                                round(waic_score[1], 2)))

    print('LOO for this model is {} ({})'.format(round(loo_score[0], 2),
                                                round(loo_score[1], 2)))

if __name__ == '__main__':
    df = load.load_all_data(2015)
    df_no_zero_outlier = df[((df.HIVincidence > 0) & (df.HIVincidence < 130))].copy()
    us_states, n_states, state = make_levels(df_no_zero_outlier, 'STATEABBREVIATION')
    print(us_states, n_states, state)
Exemple #9
0
#ax3.set_ylabel("")
ax1.set_xticklabels(['Ketamine', 'Midazolam'], fontsize=14)
ax2.set_xticklabels(['Ketamine', 'Midazolam'], fontsize=14)
#ax3.set_xticklabels(['Ketamine', 'Midazolam'], fontsize=14)
fig.savefig("changeCorrelation.png", dpi=300, bbox_inches='tight')

# %% [markdown]
# ## Use PyMC3 to compare the difference in correlation

# %%
# Using Pymc3
import pymc3 as pm
from pymc3.glm import GLM

with pm.Model() as model_glm:
    GLM.from_formula('amg_hipp_change ~ groupIdx', dfCors)
    trace = pm.sample(draws=4000, tune=3000)

# %%
pm.summary(trace, credible_interval=.95).round(2)

# %%
# Using Pymc3 - compare antrior hippo and antvmpfc
with pm.Model() as model_glm2:
    GLM.from_formula('hippoAnt_vmpfcAnt_change ~ groupIdx', dfCors)
    trace2 = pm.sample(draws=4000, tune=2000)

# %%
pm.summary(trace2, credible_interval=.95).round(2)

# %%
Exemple #10
0
from pymc3.glm import GLM
import pylab as plt
import pandas
from scipy.stats import uniform, norm

# Data
np.random.seed(1056)                          # set seed to replicate example
nobs= 250                                     # number of obs in model 
x1 = uniform.rvs(size=nobs)                   # random uniform variable

beta0 = 2.0                                   # intercept
beta1 = 3.0                                   # angular coefficient

xb = beta0 + beta1 * x1                       # linear predictor, xb
y = norm.rvs(loc=xb, scale=1.0, size=nobs)    # create y as adjusted

                                              
# Fit
df = pandas.DataFrame({'x1': x1, 'y': y})     # re-write data

with Model() as model_glm:
    GLM.from_formula('y ~ x1', df)
    trace = sample(5000)

# Output
summary(trace)

# show graphical output
traceplot(trace)
plt.show()
Exemple #11
0
def get_mcmc_betas(train_Ys, train_Xs):
    """

    :return mcmc_betas: (Series) Coefficients of intercept and betas.
    """
    print('train_size:', len(train_Xs))

    train_data = pandas.DataFrame({
        'Y': train_Ys,
        'X_1': train_Xs[:, 1],
        'X_2': train_Xs[:, 2],
        'X_3': train_Xs[:, 3],
        'X_4': train_Xs[:, 4],
        'X_5': train_Xs[:, 5],
        'X_6': train_Xs[:, 6],
        'X_7': train_Xs[:, 7],
        'X_8': train_Xs[:, 8],
        'X_9': train_Xs[:, 9],
        'X_10': train_Xs[:, 10],
        'X_11': train_Xs[:, 11],
        'X_12': train_Xs[:, 12],
        'X_13': train_Xs[:, 13],
        'X_14': train_Xs[:, 14],
        'X_15': train_Xs[:, 15],
        'X_16': train_Xs[:, 16],
        'X_17': train_Xs[:, 17],
        'X_18': train_Xs[:, 18],
        'X_19': train_Xs[:, 19],
        'X_20': train_Xs[:, 20],
        'X_21': train_Xs[:, 21],
        'X_22': train_Xs[:, 22],
        'X_23': train_Xs[:, 23],
        'X_24': train_Xs[:, 24],
        'X_25': train_Xs[:, 25],
        'X_26': train_Xs[:, 26],
        'X_27': train_Xs[:, 27],
        'X_28': train_Xs[:, 28],
        'X_29': train_Xs[:, 29],
        'X_30': train_Xs[:, 30],
        'X_31': train_Xs[:, 31],
        'X_32': train_Xs[:, 32],
        'X_33': train_Xs[:, 33],
        'X_34': train_Xs[:, 34],
        'X_35': train_Xs[:, 35],
        'X_36': train_Xs[:, 36],
        'X_37': train_Xs[:, 37],
        'X_38': train_Xs[:, 38],
        'X_39': train_Xs[:, 39],
        'X_40': train_Xs[:, 40],
        'X_41': train_Xs[:, 41],
        'X_42': train_Xs[:, 42],
        'X_43': train_Xs[:, 43],
        'X_44': train_Xs[:, 44],
        'X_45': train_Xs[:, 45]
    })

    with pm.Model():
        GLM.from_formula(
            'Y ~ X_1 + X_2 + X_3 + X_4 + X_5 + X_6 + X_7 + X_8 + X_9 + X_10'
            ' + X_11 + X_12 + X_13 + X_14 + X_15 + X_16 + X_17 + X_18 + X_19 + X_20'
            ' + X_21 + X_22 + X_23 + X_24 + X_25 + X_26 + X_27 + X_28 + X_29 + X_30'
            ' + X_31 + X_32 + X_33 + X_34 + X_35 + X_36 + X_37 + X_38 + X_39 + X_40'
            ' + X_41 + X_42 + X_43 + X_44 + X_45',
            train_data,
            family=Binomial())
        trace = pm.sample(cores=os.cpu_count())
        summary = pm.summary(trace)
        pm.traceplot(trace)
        plt.savefig('stats/posterior_distribution.png')
        plt.show()

    mcmc_betas = summary['mean']
    return mcmc_betas