Example #1
0
def main():
  X, Y = generate_sample()

  with pm.Model() as model:
    alpha = pm.Normal('alpha', mu=0, sd=20)
    beta = pm.Normal('beta', mu=0, sd=20)
    sigma = pm.Uniform('sigma', lower=0)
    y = pm.Normal('y', mu=beta*X+alpha, sd=sigma, observed=Y)
    start = pm.find_MAP()
    step = pm.NUTS(state=start)

  with model:
    if (multicore):
      trace = pm.sample(itenum, step, start=start,
        njobs=chainnum, random_seed=range(chainnum), progressbar=progress)
    else:
      ts = [pm.sample(itenum, step, chain=i, progressbar=progress)
            for i in range(chainnum)]
      trace = merge_traces(ts)

    if (saveimage):
      pm.traceplot(trace).savefig("simple_linear_trace.png")
    print "Rhat = {0}".format(pm.gelman_rubin(trace))

  t1 = time.clock()
  print "elapsed time = {0}".format(t1 - t0)

  #trace
  if(not multicore):
  	trace=ts[0]
  with model:
  	pm.traceplot(trace,model.vars)

  pm.forestplot(trace)

  with open("simplelinearregression_model.pkl","w") as fpw:
  	pkl.dump(model,fpw)
  with open("simplelinearregression_trace.pkl","w") as fpw:
  	pkl.dump(trace,fpw)
  with open("simplelinearregression_model.pkl") as fp:
  	model=pkl.load(fp)
  with open("simplelinearregression_trace.pkl") as fp:
  	trace=pkl.load(fp)
   
    print "MODEL BUILT! READY TO FIND MAP"
    start = pm.find_MAP()
    step = pm.Slice()
    # step = pm.NUTS(scaling=start)

    niter = 300
    trace = pm.sample(niter, step, start, progressbar=True)

    pm.traceplot(trace)
        # , vars=['muA'])
    # plt.savefig("data1.png")
    plt.show()

    pm.forestplot(trace)
    plt.show()

    # # print ppc['Y_obs']
    # print ppc['distributionA']
    # print ppc['distributionB']
    # tau = pm.Uniform('tau', lower=0, upper=1000)
    # lam = pm.Uniform('lam', lower = 0, upper = 1000)
    # alpha = pm.Uniform('alpha', lower = 0.0000000000000001, upper = 100)
    
    # p_weekend = float(len(b)) / (len(b) + len(c))
    # print "Got here"
    # # weekend = pm.Bernoulli('weekend', p_weekend, observed = weekend_observed)
    # print b[:3000]
    # print c[:3000]
    # startTimeWeekend = pm.Normal('a', mu = muA, sd = sigmaA, observed = b[:30000])
Example #3
0
 def posterior_forestplot(self, **kwargs):
     return pm.forestplot(self.posterior_, **kwargs)
Example #4
0
def mixed_effects():


    le = preprocessing.LabelEncoder()
    # Convert categorical variables to integer
    # participants_idx = le.fit_transform(messages['prev_sender'])

    classes = 'FF49_industry'
    # classes = 'underwriter_tier'
    # classes = 'amends'

    print("Grouping by: {}".format(classes))

    FF49_industry = le.fit_transform(df['FF49_industry'])
    class_idx = le.fit_transform(df[classes])
    n_classes = len(le.classes_)


    NSamples = 50000
    burn = NSamples/10
    thin = 2

    covariates = [
            'Intercept',
            '#Syndicate Members',
            '#Lead Underwriters',
            'Underwriter Rank',
            # 'FF49 Industry',
            'Amends Down',
            '#S1A Amendments',
            'Share Overhang',
            'log(1+Sales)',
            'log(Proceeds)',
            'CASI',
            # 'media_1st_pricing',
            # 'VC',
            'IPO Market Returns',
            'Industry Returns',
            'BAA Spread',
            ]

    y = df['days_to_first_price_update'].values
    # y = np.ma.masked_values(list(df.days_to_first_price_update), value=-999)



    with pm.Model() as model:

        # Parameters:
        intercept = pm.Gamma('Intercept', alpha=.1, beta=.1, shape=n_classes)

        beta_underwriter_syndicate_size = pm.Normal('#Syndicate Members', mu=0, sd=20)
        beta_underwriter_num_leads = pm.Normal('#Lead Underwriters', mu=0, sd=20)
        beta_underwriter_rank_avg = pm.Normal('Underwriter Rank', mu=0, sd=20)
        beta_num_SEC_amendments = pm.Normal('#S1A Amendments', mu=0, sd=20)
        # beta_FF49_industry = pm.Normal('FF49 Industry', mu=0, sd=20)
        beta_amends_down = pm.Normal('Amends Down', mu=0, sd=20)
        beta_share_overhang = pm.Normal('Share Overhang', mu=0, sd=20)
        beta_log_sales = pm.Normal('log(1+Sales)', mu=0, sd=20)
        beta_log_proceeds = pm.Normal('log(Proceeds)', mu=0, sd=20)
        beta_CASI = pm.Normal('CASI', mu=0, sd=20)
        # beta_media_1st_pricing = pm.Normal('media_1st_pricing', mu=0, sd=20)
        # beta_VC = pm.Normal('VC', mu=0, sd=20)
        beta_BAA_spread = pm.Normal('BAA Spread', mu=0, sd=20)
        beta_M3_initial_returns = pm.Normal('IPO Market Returns', mu=0, sd=20)
        beta_M3_indust_rets = pm.Normal('Industry Returns', mu=0, sd=20)

        # Hyperparameters
        ## alpha: hyperparameters for neg-binom distribution
        alpha = pm.Gamma('alpha', alpha=.1, beta=.1)



        # #Poisson Model Formula
        mu = 1 + tt.exp(
                intercept[class_idx]
                + beta_underwriter_syndicate_size * df.underwriter_syndicate_size
                + beta_underwriter_num_leads * df.underwriter_num_leads
                + beta_underwriter_rank_avg * df.underwriter_rank_avg
                # + beta_FF49_industry * FF49_industry
                + beta_amends_down * df['Amends Down']
                + beta_num_SEC_amendments * df.num_SEC_amendments
                + beta_share_overhang * df['Share Overhang']
                + beta_log_sales * df['log(1+Sales)']
                + beta_CASI * df['CASI']
                + beta_log_proceeds * df['log(Proceeds)']
                # + beta_media_1st_pricing * df.media_1st_pricing
                # + beta_VC * df.VC
                + beta_BAA_spread * df['BAA Spread']
                + beta_M3_initial_returns * df.M3_initial_returns
                + beta_M3_indust_rets * df.M3_indust_rets
                    )

        # Dependent Variable
        BoundedNegativeBinomial = pm.Bound(pm.NegativeBinomial, lower=1)
        y_est = BoundedNegativeBinomial('y_est', mu=mu, alpha=alpha, observed=y)
        y_pred = BoundedNegativeBinomial('y_pred', mu=mu, alpha=alpha, shape=y.shape)
        # y_est = pm.NegativeBinomial('y_est', mu=mu, alpha=alpha, observed=y)
        # y_pred = pm.NegativeBinomial('y_pred', mu=mu, alpha=alpha, shape=y.shape)
        # y_est = pm.Poisson('y_est', mu=mu, observed=data)
        # y_pred = pm.Poisson('y_pred', mu=mu, shape=data.shape)

        start = pm.find_MAP()
        step = pm.Metropolis(start=start)
        # step = pm.NUTS()
        # backend = pm.backends.Text('test')
        # trace = pm.sample(NSamples, step, start=start, chain=1, njobs=2, progressbar=True, trace=backend)
        trace = pm.sample(NSamples, step, start=start, njobs=1, progressbar=True)

        trace2 = trace
        trace = trace[-burn::thin]

        # waic = pm.waic(trace)
        # dic = pm.dic(trace)



    # with pm.Model() as model:
    #     trace_loaded = pm.backends.sqlite.load('FF49_industry.sqlite')
        # y_pred.dump('FF49_industry_missing/y_pred')


    ## POSTERIOR PREDICTIVE CHECKS
    y_pred = trace.get_values('y_pred')
    pm.summary(trace, vars=covariates)


    # PARAMETER POSTERIORS
    anno_kwargs = {'xycoords': 'data', 'textcoords': 'offset points',
                    'rotation': 90, 'va': 'bottom', 'fontsize': 'large'}
    anno_kwargs2 = {'xycoords': 'data', 'textcoords': 'offset points',
                    'rotation': 0, 'va': 'bottom', 'fontsize': 'large'}


    n0, n1, n2, n3 = 1, 5, 9, 14 # numbering for posterior plots
    # intercepts
    # mn = pm.df_summary(trace)['mean']['Intercept_log__0']
    # ax[0,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(0,15), color=blue, **anno_kwargs2)
    # mn = pm.df_summary(trace)['mean']['Intercept_log__1']
    # ax[0,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(0,15), color=purple, **anno_kwargs2)
    # coeffs
    # mn = pm.df_summary(trace)['mean'][2]
    # ax[1,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5, 10), color=red, **anno_kwargs)
    # mn = pm.df_summary(trace)['mean'][3]
    # ax[2,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    # mn = pm.df_summary(trace)['mean'][4]
    # ax[3,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    # plt.savefig('figure1_mixed.png')

    ax = pm.traceplot(trace, vars=['Intercept']+trace.varnames[n0:n1],
            lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()}
            )

    for i, mn in enumerate(pm.df_summary(trace)['mean'][n0:n1]): # +1 because up and down intercept
        ax[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    plt.savefig('figure1_mixed.png')


    ax2 = pm.traceplot(trace, trace.varnames[n1:n2],
            lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()}
            )
    for i, mn in enumerate(pm.df_summary(trace)['mean'][n1:n2]): # +1 because up and down intercept
        ax2[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    plt.savefig('figure2_mixed.png')



    ax3 = pm.traceplot(trace, trace.varnames[n2:n3],
            lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()}
            )
    for i, mn in enumerate(pm.df_summary(trace)['mean'][n2:n3]): # +1 because up and down intercept
        ax3[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    plt.savefig('figure3_mixed.png')


    # _ = plt.figure(figsize=(5, 6))
    _ = pm.forestplot(trace, vars=['Intercept'], ylabels=le.classes_)
    plt.savefig('forestplot_intercepts.png')
    _ = pm.forestplot(trace, vars=covariates[1:], ylabels=covariates[1:])
    plt.savefig('forestplot_mixed.png')

    # pm.traceplot(trace, vars=['alpha', 'y_pred'])



    # def participant_y_pred(entity_name, burn=1000, hierarchical_trace=trace):
    #     """Return posterior predictive for person"""
    #     ix = np.where(le.classes_ == entity_name)[0][0]
    #     return hierarchical_trace['y_pred'][burn:, ix]

    def participant_y_pred(entity_name, burn=1000, ypred=y_pred):
        """Return posterior predictive for person"""
        ix = np.where(le.classes_ == entity_name)[0][0]
        return ypred[burn:, ix]

    days = 7

    fig = plt.figure(figsize=(16,10))
    fig.add_subplot(221)
    entity_plotA('Up', days=days)
    fig.add_subplot(222)
    entity_plotB('Up')

    fig.add_subplot(223)
    entity_plotA('Down', days=days)
    fig.add_subplot(224)
    entity_plotB('Down')
    plt.savefig("figure4-postpreddist-updown")
Example #5
0
    # Linear combinations of parameters
    theta = invlogit(alpha + beta*dose)

    # Model likelihood
    deaths = Binomial('deaths', n=n, p=theta, observed=y)
  




with bioassay_model:

    # Draw wamples
    trace = sample(1000, njobs=2)
    # Plot two parameters
    forestplot(trace, varnames=['alpha', 'beta'])
  



### MOTIVATING EXAMPLE -- LINEAR REGRESSION 
import numpy as np
import matplotlib.pyplot as plt

# Initialize random number generator
np.random.seed(123)

# True parameter values
alpha, sigma = 1, 1
beta = [1, 2.5]