Example #1
0
 def test_value_n_eff_rhat(self):
     mu = -2.1
     tau = 1.3
     with Model():
         Normal('x0', mu, tau, testval=floatX_array(.1)) # 0d
         Normal('x1', mu, tau, shape=2, testval=floatX_array([.1, .1]))# 1d
         Normal('x2', mu, tau, shape=(2, 2),
                testval=floatX_array(np.tile(.1, (2, 2))))# 2d
         Normal('x3', mu, tau, shape=(2, 2, 3),
                testval=floatX_array(np.tile(.1, (2, 2, 3))))# 3d
         trace = pm.sample(100, step=pm.Metropolis())
     for varname in trace.varnames:
         # test effective_n value
         n_eff = pm.effective_n(trace, varnames=[varname])[varname]
         n_eff_df = np.asarray(
                 pm.summary(trace, varnames=[varname])['n_eff']
                              ).reshape(n_eff.shape)
         npt.assert_equal(n_eff, n_eff_df)
         
         # test Rhat value
         rhat = pm.gelman_rubin(trace, varnames=[varname])[varname]
         rhat_df = np.asarray(
                 pm.summary(trace, varnames=[varname])['Rhat']
                              ).reshape(rhat.shape)
         npt.assert_equal(rhat, rhat_df)
Example #2
0
def run(n=5000):
    with model_1:
        xstart = pm.find_MAP()
        xstep = pm.Slice()
        trace = pm.sample(5000, xstep, xstart, random_seed=123, progressbar=True)

        pm.summary(trace)
Example #3
0
def test_summary_1d_variable_model():
    mu = -2.1
    tau = 1.3
    with Model() as model:
        x = Normal('x', mu, tau, shape=2, testval=[.1, .1])
        step = Metropolis(model.vars, np.diag([1.]), blocked=True)
        trace = pm.sample(100, step=step)
    pm.summary(trace)
Example #4
0
 def test_summary_0d_variable_model(self):
     mu = -2.1
     tau = 1.3
     with Model() as model:
         Normal('x', mu, tau, testval=.1)
         step = Metropolis(model.vars, np.diag([1.]), blocked=True)
         trace = pm.sample(100, step=step)
     pm.summary(trace)
Example #5
0
 def test_disaster_model_missing(self):
     model = build_disaster_model(masked=True)
     with model:
         # Initial values for stochastic nodes
         start = {'early_mean': 2., 'late_mean': 3.}
         # Use slice sampler for means (other varibles auto-selected)
         step = pm.Slice([model.early_mean_log_, model.late_mean_log_])
         tr = pm.sample(500, tune=50, start=start, step=step)
         pm.summary(tr)
Example #6
0
 def test_summary_2d_variable_model(self):
     mu = -2.1
     tau = 1.3
     with Model() as model:
         Normal('x', mu, tau, shape=(2, 2),
                testval=floatX_array(np.tile(.1, (2, 2))))
         step = Metropolis(model.vars, np.diag([1.]), blocked=True)
         trace = pm.sample(100, step=step)
     pm.summary(trace)
Example #7
0
    def still_broken(self):
        ATMIP_test = self.build_model()
        with ATMIP_test:
            step = pm.ATMCMC(n_chains=500, tune_interval=25,
                             likelihood_name=ATMIP_test.deterministics[0].name)

            trace = pm.ATMIP_sample(
                n_steps=50,
                step=step,
                njobs=1,
                progressbar=True,
                trace=self.trace_dir,
                )

        pm.summary(trace)
Example #8
0
    def test_save_and_load_work_correctly(self):
        print("")
        self.test_HLR.fit(self.X_train, self.Y_train, self.cat_train)
        probs1 = self.test_HLR.predict_proba(self.X_test, self.cat_test)
        self.test_HLR.save(self.test_dir)

        HLR2 = HLR()

        HLR2.load(self.test_dir)

        self.assertEqual(self.test_HLR.num_cats, HLR2.num_cats)
        self.assertEqual(self.test_HLR.num_pred, HLR2.num_pred)
        self.assertEqual(summary(self.test_HLR.advi_trace),
                         summary(HLR2.advi_trace))

        probs2 = HLR2.predict_proba(self.X_test, self.cat_test)

        np.testing.assert_almost_equal(probs2, probs1, decimal=1)
Example #9
0
def acfplot_withsummary(axs, lc, trace, summary_kwargs={}, acf_kwargs={}):
    summary = pm.summary(trace)
    summary['mode'] = list(utils.modes(trace).values())
    plotacf(axs[0], lc, **acf_kwargs)
    axs[1].xaxis.set_visible(False)
    axs[1].yaxis.set_visible(False)
    table(axs[1], summary.round(3))
    pl.tight_layout()
    return axs
Example #10
0
 def _predict_scores_fixed(self, X, **kwargs):
     d = dict(pm.summary(self.trace_)["mean"])
     intercept = 0.0
     weights = np.array(
         [d["weights[{}]".format(i)] for i in range(self.n_object_features_fit_)]
     )
     if "intercept" in d:
         intercept = intercept + d["intercept"]
     return np.dot(X, weights) + intercept
Example #11
0
 def _predict_scores_fixed(self, X, **kwargs):
     d = dict(pm.summary(self.trace)['mean'])
     intercept = 0.0
     weights = np.array([
         d['weights__{}'.format(i)] for i in range(self.n_object_features)
     ])
     if 'intercept' in d:
         intercept = intercept + d['intercept']
     return np.dot(X, weights) + intercept
Example #12
0
    def test_save_and_load_work_correctly(self):
        print('')
        self.test_SGPR.fit(self.X_train, self.y_train)
        score1 = self.test_SGPR.score(self.X_test, self.y_test)
        self.test_SGPR.save(self.test_dir)

        SGPR2 = SparseGaussianProcessRegressor()
        SGPR2.load(self.test_dir)

        self.assertEqual(self.test_SGPR.inference_type, SGPR2.inference_type)
        self.assertEqual(self.test_SGPR.num_pred, SGPR2.num_pred)
        self.assertEqual(self.test_SGPR.num_training_samples,
                         SGPR2.num_training_samples)
        pd.testing.assert_frame_equal(summary(self.test_SGPR.trace),
                                      summary(SGPR2.trace))

        score2 = SGPR2.score(self.X_test, self.y_test)
        self.assertAlmostEqual(score1, score2, 1)
Example #13
0
 def _predict_scores_fixed(self, X, **kwargs):
     summary = dict(pm.summary(self.trace)['mean'])
     weights = np.zeros((self.n_object_features, self.n_mixtures))
     for i, k in product(range(self.n_object_features),
                         range(self.n_mixtures)):
         weights[i][k] = summary['weights__{}_{}'.format(i, k)]
     utility = np.dot(X, weights)
     p = np.mean(npu.softmax(utility, axis=1), axis=2)
     return p
Example #14
0
def create_smry(trc, labels, vname=['w']):
    ''' Conv fn: create trace summary for sorted forestplot '''
    dfsm = pm.summary(trc, varnames=vname)
    dfsm.rename(index={wi: lbl
                       for wi, lbl in zip(dfsm.index, feature_labels)},
                inplace=True)
    #dfsm.sort_values('mean', ascending=True, inplace=True)
    dfsm['ypos'] = np.linspace(1, 0, len(dfsm))
    return dfsm
    def test_save_and_load_work_correctly(self):
        probs1 = self.test_HLR.predict_proba(self.X_test, self.cat_test)
        self.test_HLR.save(self.test_dir)

        HLR2 = HierarchicalLogisticRegression()

        HLR2.load(self.test_dir)

        self.assertEqual(self.test_HLR.num_cats, HLR2.num_cats)
        self.assertEqual(self.test_HLR.num_pred, HLR2.num_pred)
        self.assertEqual(self.test_HLR.num_training_samples,
                         HLR2.num_training_samples)
        pd.testing.assert_frame_equal(summary(self.test_HLR.trace),
                                      summary(HLR2.trace))

        probs2 = HLR2.predict_proba(self.X_test, self.cat_test)

        np.testing.assert_almost_equal(probs2, probs1, decimal=1)
Example #16
0
def cornerplot(lc, trace, catalog, **kwargs):
    truths = pm.summary(trace)['mean']
    samples = pm.trace_to_dataframe(trace)
    cornerplot = corner.corner(samples, truths=truths, **kwargs)
    pl.annotate("{0} {1}".format(catalog, lc.id),
                xy=(0.4, 0.95),
                xycoords="figure fraction",
                fontsize=30)
    return cornerplot
    def test_save_and_load_work_correctly(self):
        print('')
        self.test_LR.fit(self.X_train, self.Y_train)
        score1 = self.test_LR.score(self.X_test, self.Y_test)
        self.test_LR.save(self.test_dir)

        LR2 = LinearRegression()

        LR2.load(self.test_dir)

        self.assertEqual(self.test_LR.inference_type, LR2.inference_type)
        self.assertEqual(self.test_LR.num_pred, LR2.num_pred)
        self.assertEqual(self.test_LR.num_training_samples, LR2.num_training_samples)
        self.assertEqual(summary(self.test_LR.trace), summary(LR2.trace))

        score2 = LR2.score(self.X_test, self.Y_test)

        np.testing.assert_almost_equal(score1, score2, decimal=1)
Example #18
0
def solve_vi(X, Y, initial=None, batch_size=100):
    X_t = th.shared(X)  #pm.Minibatch(X,batch_size=batch_size,)
    Y_t = th.shared(Y)  #pm.Minibatch(Y,batch_size=batch_size)
    #    sigma_Y_t = th.shared(sigma_Y)#pm.Minibatch(sigma_Y,batch_size=batch_size)

    #initial=(0.3,0.5,2.)

    dx = np.max(X) - np.min(X)
    dy = np.max(Y) - np.min(Y)

    with pm.Model() as model:
        sigma_K = pm.HalfNormal('sigma_K', sd=dy / 3.)
        l_space = pm.HalfNormal('l_space', sd=dx / 3., testval=1.)
        cov_func = sigma_K**2 * pm.gp.cov.ExpQuad(
            2, active_dims=[0, 1], ls=l_space)
        gp = pm.gp.Marginal(cov_func=cov_func)
        eps = pm.Uniform('eps', 0.0, np.std(Y))
        y1 = gp.marginal_likelihood('y1', X_t, Y_t, eps)
        #y2 = gp.marginal_likelihood('y2',X[:100,:],Y[:100],eps*sigma_Y[:100])
        initial = initial or pm.find_MAP()
        approx = pm.fit(
            1000,
            start=initial,
            method='advi',
            callbacks=[
                pm.callbacks.CheckParametersConvergence(tolerance=1e-4)
            ])
        #         plt.plot(approx.hist)
        #         plt.show()
        means = approx.bij.rmap(approx.mean.eval())
        #         print(means)
        #         sds = approx.bij.rmap(approx.std.eval())
        #         print(sds)
        df = approx.sample(10000)
        p = {
            k: pm.summary(df)['mean'][k]
            for k in pm.summary(df)['mean'].keys()
        }


#         pm.traceplot(df,lines=p)
#         plt.show()
    return p
Example #19
0
    def print_summary(self, save_file = None):
        trace_summary = pm.summary(self.trace)
        print(trace_summary)
        if save_file is not None:
            ax = plt.subplot(111, frame_on=False) # no visible frame
            ax.xaxis.set_visible(False)  # hide the x axis
            ax.yaxis.set_visible(False)  # hide the y axis

            table(ax, trace_summary, loc='upper right')  # where df is your data frame
            plt.savefig(save_file)
def plot_traces(traces, retain=1000):

    ax = pm.traceplot(traces[-retain:],
                      figsize=(12, len(traces.varnames) * 1.5),
                      lines={
                          k: v['mean']
                          for k, v in pm.summary(traces[-retain:]).iterrows()
                      })

    for i, mn in enumerate(pm.summary(traces[-retain:])['mean']):
        ax[i, 0].annotate('{:.2f}'.format(mn),
                          xy=(mn, 0),
                          xycoords='data',
                          xytext=(5, 10),
                          textcoords='offset points',
                          rotation=90,
                          va='bottom',
                          fontsize='large',
                          color='#AA0022')
Example #21
0
def model_ggl(locations, samples, centers, cc):
    basic_model = pm.Model()
    with basic_model:
        # Priors for unknown model parameters
        s1 = pm.HalfNormal('s1', sd=20)
        m1 = centers[0]

        s2 = pm.Normal('s2', sd=20)
        m2 = centers[1]

        m3 = centers[2]
        s3 = pm.HalfNormal('s3', sd=20)

        p_x = gpdf(locations[0], m1, s1)
        p_y = gpdf(locations[1], m2, s2)
        p_theta = lpdf(locations[2], m3, s3)

        sigma = pm.HalfNormal('sigma', sd=1)

        # Expected value of outcome
        mu = cc * p_x * p_y * p_theta

        # Likelihood (sampling distribution) of observations
        Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=samples)
        trace = pm.sample(5000, njobs=4)

    pm.summary(trace)
    # values
    S1 = np.mean(trace['s1'])
    M1 = centers[0]

    S2 = np.mean(trace['s2'])
    M2 = centers[1]

    M3 = centers[2]
    S3 = np.mean(trace['s3'])

    p_x = gpdf(locations[0], M1, S1).eval()
    p_y = gpdf(locations[1], M2, S2).eval()
    p_theta = lpdf(locations[2], M3, S3).eval()
    mu = cc * p_x * p_y * p_theta
    Err = np.sum((samples - mu)**2)
    print(Err)
Example #22
0
def main():
    X, y = get_data()
    times, traces = profiler(X, y, max_iters=10)
    traces_summary = pm.summary(traces)

    traces_summary.to_csv('results//tables//pymc3_traces_summary.csv')
    pd.DataFrame(times,
                 columns='timing').to_csv('results//tables//pymc3_results.csv',
                                          index=False)
    return None
Example #23
0
    def test_save_and_load_work_correctly(self):
        print('')
        self.advi_stpr.fit(self.X_train, self.y_train,
                           inference_args={"n": 25000})
        score1 = self.advi_stpr.score(self.X_test, self.y_test)
        self.advi_stpr.save(self.test_dir)

        stpr2 = StudentsTProcessRegressor()
        stpr2.load(self.test_dir)

        npt.assert_equal(self.advi_stpr.inference_type, stpr2.inference_type)
        npt.assert_equal(self.advi_stpr.num_pred, stpr2.num_pred)
        npt.assert_equal(self.advi_stpr.num_training_samples,
                         stpr2.num_training_samples)
        pdt.assert_frame_equal(summary(self.advi_stpr.trace),
                               summary(stpr2.trace))

        score2 = stpr2.score(self.X_test, self.y_test)
        npt.assert_almost_equal(score1, score2, 0)
def apply(input):
    client = Algorithmia.client()
    df = parse_dataframe(input, client)
    trace = run_simulation(df)
    # For now, save trace to algorithmia data file, and return results of summary
    output_file_uri = "s3+fantasygm://fantasygm-trace-out/v1/" + input[
        "target_output"]
    # TODO: need a list of varnames for converting the multitrace to dataframe
    write_output(trace, output_file_uri, input["target_output"], client)
    return pm.summary(trace).to_json()
Example #25
0
def bms(L, **sample_kwargs):
    """This function computes the exceedance probabilities (xp)
    and expected relative frequencies (r) from an array of log-evidences.
    
    Args:
        L (numpy.ndarray): Array of model log-evidences (higher is better fit).
            Array shape should be (K models; N subjects)

        **sample_kwargs: Additional arguments to the pymc.sample function.
            Currently `cores=1` seems to be necessary.
    
    Returns:
        dict: Dictionary with values xp and r.

    Reference:
        Stephan, K. E., Penny, W. D., Daunizeau, J., Moran, R. J., & Friston, K. J. (2009). Bayesian model selection for group studies. Neuroimage, 46(4), 1004-1017.
    """

    K, N = L.shape

    with pm.Model() as bms:

        def lookup_L(L, N):
            """This function looks up the log-evidences for all N subjects,
            given the current model labels m.
            """
            return L[tt.cast(m, dtype="int32"),
                     tt.cast(tt.arange(N), dtype="int32")]

        # Priors
        alpha = pm.Uniform("alpha", 0, N, shape=K, testval=np.ones(K))

        # Model
        r = pm.Dirichlet("r", a=alpha, testval=np.ones(K) / K)
        m = pm.Categorical("m", p=r, shape=N, testval=0)

        # Look up log evidence
        ll = pm.DensityDist("ll", logp=lookup_L, observed=dict(L=L, N=N))

        # Sample
        trace = pm.sample(**sample_kwargs)

    # Build results
    result = {}
    result["summary"] = pm.summary(trace, var_names=["alpha", "r"])
    result["xp"] = np.array([
        np.mean(
            trace.get_values("r")[:, k] == trace.get_values("r").max(axis=1))
        for k in range(K)
    ])
    r_unscaled = np.array(
        [np.mean(trace.get_values("r")[:, k]) for k in range(K)])
    result["r"] = r_unscaled / r_unscaled.sum()

    return result
Example #26
0
    def __init__(self,X_train,y_train,n_hidden,lam=1):
        n_train = y_train.shape[0]
        n_dim = X_train.shape[1]
        print X_train.shape
        with pm.Model() as rbfnn:
            C = pm.Normal('C',mu=0,sd=10,shape=(n_hidden))
            #beta = pm.Gamma('beta',1,1)
            w = pm.Normal('w',mu=0,sd=10,shape=(n_hidden+1))
            
            #component, updates = theano.scan(fn=lambda x: T.sum(C-x)**2,sequences=[X_train])
            y_out=[]
            for x in X_train:
                #rbf_out =  T.exp(-lam*T.sum((C-x)**2,axis=1)) 
                #1d speed up
                rbf_out =  T.exp(-lam*(C-x)**2)
                #rbf_out = theano.printing.Print(rbf_out)                 
                rbf_out_biased = \
                        T.concatenate([ rbf_out, T.alloc(1,1) ], 0)
                y_out.append(T.dot(w,rbf_out_biased))
            
            y = pm.Normal('y',mu=y_out,sd=0.01,observed=y_train)
            
            start = pm.find_MAP(fmin=scipy.optimize.fmin_l_bfgs_b)
            print start
            step = pm.NUTS(scaling=start)
            trace = pm.sample(2000, step, progressbar=False)
            step = pm.NUTS(scaling=trace[-1])
            trace = pm.sample(20000,step,start=trace[-1])
            

            print summary(trace, vars=['C', 'w'])

            vars = trace.varnames   
            for i, v in enumerate(vars):
                for d in trace.get_values(v, combine=False, squeeze=False):
                    d=np.squeeze(d)
                    with open(str(v)+".txt","w+") as thefile:
                        for item in d:
                            print>>thefile, item

            traceplot(trace)
            plt.show()
def excel_posterior(trace, filename):

    #Need to read the data again to set activity number and names
    prj = project_reader(filename)
    WP_NAMES = np.array(prj[1][:, 0])
    WP_NUMBER = prj[1][:, 0].shape[0]

    PV_names = list()
    PVpartial_names = list()
    EV_names = list()
    COMP_names = list()
    SPI_names = list()
    CPI_names = list()
    Index_names = ["SPI_PROJECT", "CPI_PROJECT", "ETC", "EAC", "TEAC"]

    RISK_names = list()
    projectDefinition = prj[1]

    for x in range(WP_NUMBER):
        for y in range(2):
            if (projectDefinition[x][y + 1] != 0):
                rname = projectDefinition[x][0] + "_Risk_%d" % (y + 1)
                RISK_names.append(rname)

    for x in range(WP_NUMBER):
        PV_names.append("PV_%s" % WP_NAMES[x])
        PVpartial_names.append("Partial_PV_%s" % WP_NAMES[x])
        EV_names.append("EV_%s" % WP_NAMES[x])
        COMP_names.append("COMPLETION_%s" % WP_NAMES[x])
        SPI_names.append("SPI_%s" % WP_NAMES[x])
        CPI_names.append("CPI_%s" % WP_NAMES[x])
    all_names = RISK_names + PV_names + PVpartial_names + EV_names + COMP_names + SPI_names + CPI_names + Index_names

    outputName = filename + "Output.xlsx"
    traceName = filename + "Trace.xlsx"
    pm.summary(trace,
               varnames=all_names,
               stat_funcs=[trace_mean, trace_sd,
                           trace_quantiles]).to_excel(outputName,
                                                      sheet_name="Summary")
    pm.plot_posterior(trace, varnames=all_names)
    pm.trace_to_dataframe(trace).to_excel(traceName, sheet_name="Trace")
 def _predict_scores_fixed(self, X, **kwargs):
     mean_trace = dict(pm.summary(self.trace)["mean"])
     weights = np.array(
         [mean_trace["weights[{}]".format(i)] for i in range(self.n_object_features)]
     )
     lambda_k = np.array(
         [mean_trace["lambda_k[{}]".format(i)] for i in range(self.n_nests)]
     )
     utility = np.dot(X, weights)
     p = self._get_probabilities_np(utility, lambda_k)
     return p
Example #29
0
    def run(self):
        coloredlogs.install()
        logging.info('Fetching some data')
        with dask.set_options(get=dask.multiprocessing.get):
            data = dask.dataframe.read_csv(
                '/tmp/split_data/{}/train/*.csv'.format(self.rand_round))
            total_size = data.week_num.count().compute()
            nose.tools.assert_greater(total_size, 100, 'Not enought data!')

            unique_products = data['product_id'].unique().compute().astype(
                np.uint16)
            sample = data.head()
        logging.info('Got it!')

        product_id_var = theano.shared(value=sample.product_id.astype(
            'category', categories=unique_products).cat.codes.values,
                                       name='product_id_var')
        adjusted_demand_var = theano.shared(
            value=sample.adjusted_demand.values, name='adjusted_demand_var')

        model = pm.Model()
        with model:
            product_category = pm.Uniform('cat',
                                          0,
                                          1,
                                          shape=(unique_products.shape[0], 5))
            product_vecs = pm.Normal('vecs', 0, 100, shape=5)
            adjusted_demand_variance = pm.HalfNormal('demand_variance', 10)
            product_pred = T.dot(product_category[product_id_var],
                                 product_vecs)

            adjusted_demand = pm.Normal('adjusted_demand',
                                        product_pred,
                                        adjusted_demand_variance,
                                        observed=adjusted_demand_var)

            minibatches = map(self.expand_batch,
                              self.minibatches(unique_products))

            v_params = pm.variational.advi_minibatch(
                n=100,
                minibatch_tensors=[product_id_var, adjusted_demand_var],
                minibatch_RVs=[adjusted_demand],
                minibatches=minibatches,
                total_size=total_size,
                n_mcsamples=5,
                verbose=True)
            trace = pm.variational.sample_vp(v_params, draws=500)
            print(pm.summary(trace))

        res = trace[-100:]['cat'].mean(0)
        self.output().makedirs()
        pandas.DataFrame(res, index=unique_products.values).to_msgpack(
            self.output().path)
Example #30
0
    def run_mcmc(self, spec_method='flexible'):
        with pm.Model() as mdl:
            if spec_method == 'flexible':
                # specify priors
                self.logger.info('specifying priors')
                intercept = pm.Normal('intercept', mu=0., sd=1000.)
                x1_coef = pm.Normal('x1_coef', mu=0., sd=1000.)
                x2_coef = pm.Normal('x2_coef', mu=0., sd=1000.)
                # residual_std = pm.HalfCauchy('sigma', beta=10, testval=1.)
                residual_std = pm.Gamma('residual_std',
                                        mu=1.,
                                        sd=1000.,
                                        testval=1.)

                # specify likelihood
                self.logger.info('specifying likelihood')
                mu = (intercept + x1_coef * self.dataset['X'][:, 0] +
                      x2_coef * self.dataset['X'][:, 1])
                likelihood = pm.Normal('y',
                                       mu=mu,
                                       sd=residual_std,
                                       observed=self.dataset['y'])

            elif spec_method == 'patsy_glm':
                data_dict = {
                    'y': self.dataset['y'],
                    'x1': self.dataset['X'][:, 0],
                    'x2': self.dataset['X'][:, 1],
                }

                self.logger.info('specifying model using patsy glm method')
                pm.glm.GLM.from_formula('y ~ x1 + x2', data_dict)

            else:
                raise ValueError(
                    'unrecognised spec_method {}'.format(spec_method))

            # run mcmc (using automatically chosen sampler, e.g. NUTS sampling)
            self.logger.info('running mcmc')
            trace = pm.sample(6000, njobs=1, tune=1000)
            # note: 'tune' argument handles the burn-in

            # show results (with no thinning)
            n_burnin_samples = 0  # burn-in handled above
            msg = ('summary of marginal posteriors (no thinning):\n{}'.format(
                pm.summary(trace, start=n_burnin_samples).round(2)))
            self.logger.info(msg)
            pm.traceplot(trace, skip_first=n_burnin_samples)
            plt.show()

            self._show_custom_plots(
                trace=trace,
                params=['intercept', 'x1_coef', 'x2_coef', 'residual_std'],
                burnin=n_burnin_samples)
Example #31
0
def bayesTest(mocktable, outname):
    import pymc3 as pymc
    from pymc3.backends import SQLite
    from collections import Counter

    idx = {}
    expr_vector = {}
    for line in open(mocktable):
        if line.startswith('Gene'):
            header = line.strip().split('\t')
            for i in range(len(header)):
                if header[i] != 'Gene':
                    idx[header[i]] = i
        else:
            vals = line.strip().split('\t')
            gene = vals[0]
            for sample in idx:
                if sample not in expr_vector:
                    expr_vector[sample] = [float(vals[idx[sample]])]
                else:
                    expr_vector[sample].append(float(vals[idx[sample]]))
    for sample in expr_vector:
        if sample == 'Neurons':
            neuro = expr_vector[sample]
        if sample == 'Astrocytes':
            astro = expr_vector[sample]
        if sample == 'Oligodendrocytes':
            oligo = expr_vector[sample]
        if sample == 'Sample1':
            one = expr_vector[sample]
        if sample == 'Sample2':
            two = expr_vector[sample]
        if sample == 'Sample3':
            three = expr_vector[sample]
    samples = [one, two, three]
    for s in samples:
        model = pymc.Model()
        with pymc.Model() as model:
            beta = pymc.Dirichlet('beta', a=np.array([1.0, 1.0, 1.0]))
            sigma = pymc.HalfNormal('sigma', sd=1)
            y_est = beta[0] * neuro + beta[1] * astro + beta[2] * oligo
            likelihood = pymc.Normal('y', mu=y_est, sd=sigma, observed=s)
            trace = pymc.sample(1000, random_seed=123, progressbar=True)
            s = pymc.summary(trace)
            #print trace['beta'] #matrix with 3 columns and 1000 rows, need to convert this and do math
            neurons = trace['beta'][:, 0]
            astrocytes = trace['beta'][:, 1]
            oligodendrocytes = trace['beta'][:, 2]
            n_avg = np.mean(neurons)
            n_med = np.median(neurons)
            data = Counter(neurons)
            data.most_common()
            n_mode = data.most_common(1)[0][0]
            print n_avg, n_med, n_mode
Example #32
0
def compare_parameters_individual(model,
                                  parameters,
                                  comparisons=None):

    if comparisons is None:
        comparisons = []
    n_params = len(parameters)
    n_comps = len(comparisons)

    subjects = model.data['subject'].unique().astype(int)
    summaries = [summary(trace) for trace in model.trace]

    comparison_df = []
    
    for p, parameter in enumerate(parameters):

        # Comparisons
        for c, comparison in enumerate(comparisons):
            comparison_string = '{}-{}'.format(*comparison)
            df_pc = pd.DataFrame(dict(subject=subjects, parameter=parameter, comparison=comparison_string),
                                 index=subjects)

            # Check if parameter has dependence
            if model.design[parameter]['dependence'] is not None:
                # Then, if both conditions are present, plot posterior of the difference
                c0_present = (
                    comparison[0] in model.design[parameter]['conditions'])
                c1_present = (
                    comparison[1] in model.design[parameter]['conditions'])
                if c0_present & c1_present:
                    differences = np.array([(model.trace[i].get_values(parameter + '_' + comparison[0]) -
                                             model.trace[i].get_values(parameter + '_' + comparison[1]))
                                            for i in subjects])[:, :, 0, 0]

                    means = np.mean(differences, axis=1)
                    hpdlower, hpdupper = hpd(differences.T, alpha=0.05).T
                    plarger0 = np.mean(differences > 0, axis=1)
                    df_pc['mean'] = means
                    df_pc['hpd_2.5'] = hpdlower
                    df_pc['hpd_97.5'] = hpdupper
                    df_pc['p>0'] = plarger0

                else:
                    # Otherwise, state that at least one condition is not present.
                    df_pc['warning'] = 'At least one condition is missing.'
            else:
                # Or that the parameter has no dependencies.
                df_pc['warning'] = 'Parameter has no dependencies.'

            comparison_df.append(df_pc)
    
    comparison_df = pd.concat(comparison_df, sort=False).sort_values('subject').reset_index(drop=True)

    return comparison_df
Example #33
0
def main(StartYear, EndYear, n_draw, model):
    data = ReadData(StartYear, EndYear)
    training_data_df = data.train

    if model == 'exponential':
        model_obj = exponential_model(training_data_df)
    elif model == 'hidden_vol':
        model_obj = hidden_vol_model(training_data_df)
    else:
        raise NotImplementedError

    n_cpus = multiprocessing.cpu_count()
    print('[INFO {}] starts sampling on {} CPUs.'.format(now(), n_cpus))
    with model_obj:
        trace = pm.sample(draws=n_draw, njobs=n_cpus)
    pm.summary(trace)

    output_file = '{}_model_trace.pkl'.format(model)
    with open(output_file, 'wb') as output_file_obj:
        pickle.dump(trace, output_file_obj)
Example #34
0
def run():
    plt.rcParams['figure.figsize'] = 14, 6
    np.random.seed(0)
    print('Running on PyMC3 v{}'.format(pm.__version__))
    # decide poisson theta values
    theta_noalcohol_meds = 1    # no alcohol, took an antihist
    theta_alcohol_meds = 3      # alcohol, took an antihist
    theta_noalcohol_nomeds = 6  # no alcohol, no antihist
    theta_alcohol_nomeds = 36   # alcohol, no antihist
    
    # create samples
    q = 1000
    df = pd.DataFrame({
            'nsneeze': np.concatenate((np.random.poisson(theta_noalcohol_meds, q),
                                       np.random.poisson(theta_alcohol_meds, q),
                                       np.random.poisson(theta_noalcohol_nomeds, q),
                                       np.random.poisson(theta_alcohol_nomeds, q))),
            'alcohol': np.concatenate((np.repeat(False, q),
                                       np.repeat(True, q),
                                       np.repeat(False, q),
                                       np.repeat(True, q))),
            'nomeds': np.concatenate((np.repeat(False, q),
                                          np.repeat(False, q),
                                          np.repeat(True, q),
                                          np.repeat(True, q)))})

    g = sns.catplot(x='nsneeze', row='nomeds', col='alcohol', data=df,
               kind='count', size=4, aspect=1.5)
    fml = 'nsneeze ~ alcohol + antihist + alcohol:antihist'  # full patsy formulation
    fml = 'nsneeze ~ alcohol * nomeds'  # lazy, alternative patsy formulation
    (mx_en, mx_ex) = pt.dmatrices(fml, df, return_type='dataframe', NA_action='raise')
    pd.concat((mx_ex.head(3),mx_ex.tail(3)))
    with pm.Model() as mdl_fish:
        # define priors, weakly informative Normal
        b0 = pm.Normal('b0_intercept', mu=0, sigma=10)
        b1 = pm.Normal('b1_alcohol[T.True]', mu=0, sigma=10)
        b2 = pm.Normal('b2_nomeds[T.True]', mu=0, sigma=10)
        b3 = pm.Normal('b3_alcohol[T.True]:nomeds[T.True]', mu=0, sigma=10)
    
        # define linear model and exp link function
        theta = (b0 +
                b1 * mx_ex['alcohol[T.True]'] +
                b2 * mx_ex['nomeds[T.True]'] +
                b3 * mx_ex['alcohol[T.True]:nomeds[T.True]'])
    
        ## Define Poisson likelihood
        y = pm.Poisson('y', mu=np.exp(theta), observed=mx_en['nsneeze'].values)
        trc_fish = pm.sample(1000, tune=1000, cores=4)
        
    
    rvs_fish = [rv.name for rv in strip_derived_rvs(mdl_fish.unobserved_RVs)]
    plot_traces_pymc(trc_fish, varnames=rvs_fish)
    print(np.exp(pm.summary(trc_fish, varnames=rvs_fish)[['mean','hpd_2.5','hpd_97.5']]))
    plt.show()
Example #35
0
def summary(trace, **kwargs):
    """Improve PyMC3 summary function by adding posterior mode.

    :param trace: PyMC3 trace object
    :param kwargs: keyword args for PyMC3 trace summary function
    :returns: PyMC3 trace summary in a pandas DataFrame
    """
    return pm.summary(
        trace,
        extend=True,
        stat_funcs=[lambda x: pd.Series(posterior_mode(x), name='mode')],
        **kwargs)
def estimate_statistic_mcmc(data):
    """
	To be done
	"""
    with pm.Model() as model:
        mu = pm.Normal('mu', mu=0, sd=5)
        std = pm.Normal('std', mu=1, sd=3)
        obs = pm.Normal('obs', mu=mu, sd=std, observed=data)
    with model:
        trace = pm.sample(1000)
    #mu = pm.summary(trace)
    return pm.summary(trace)
Example #37
0
    def sample(self, draws=1000, tune=1000, chains=4, **kwargs):
        with self.model as model:
            map_params = pm.find_MAP()
            self.trace = pm.sample(draws=draws,
                                   tune=tune,
                                   chains=chains,
                                   start=map_params,
                                   **kwargs)

        return pm.summary(
            self.trace,
            varnames=["period", "lighttime", "tref", "varpi", "eccen"])
Example #38
0
    def test_save_and_load_work_correctly(self):
        print("")
        self.test_HLM.fit(self.X_train, self.cat_train, self.Y_train)
        probs1 = self.test_HLM.predict_proba(self.X_test, self.cat_test)
        probs2 = self.test_HLM.predict_proba(self.X_test, self.cat_test)
        self.test_HLM.save(self.test_dir)

        HLM2 = HLM()

        HLM2.load(self.test_dir)

        self.assertEqual(self.test_HLM.num_cats, HLM2.num_cats)
        self.assertEqual(self.test_HLM.num_pred, HLM2.num_pred)
        self.assertEqual(summary(self.test_HLM.advi_trace), summary(HLM2.advi_trace))

        for key in self.test_HLM.v_params.means.keys():
            np.testing.assert_equal(self.test_HLM.v_params.means[key], HLM2.v_params.means[key])

        probs3 = HLM2.predict_proba(self.X_test, self.cat_test)

        np.testing.assert_almost_equal(probs3, probs1, decimal=1)
Example #39
0
def run(n=1500):
    if n == 'short':
        n = 50

    with m:
        trace = pm.sample(n)

    pm.traceplot(trace, varnames=['mu_hat'])

    print('Example observed data: ')
    print(y[:30, :].T)
    print('The true ranking is: ')
    print(yreal.flatten())
    print('The Latent mean is: ')
    latentmu = np.hstack(([0], pm.summary(trace, varnames=['mu_hat'])['mean'].values))
    print(np.round(latentmu, 2))
    print('The estimated ranking is: ')
    print(np.argsort(latentmu))
Example #40
0
    log_like2 = - 0.5 * n * tt.log(2 * np.pi) \
                - 0.5 * tt.log(dsigma) \
                - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2)
    return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2))

with pm.Model() as ATMIP_test:
    X = pm.Uniform('X',
                   shape=n,
                   lower=-2. * np.ones_like(mu1),
                   upper=2. * np.ones_like(mu1),
                   testval=-1. * np.ones_like(mu1),
                   transform=None)
    like = pm.Deterministic('like', two_gaussians(X))
    llk = pm.Potential('like', like)

with ATMIP_test:
    step = atmcmc.ATMCMC(n_chains=n_chains, tune_interval=tune_interval,
                         likelihood_name=ATMIP_test.deterministics[0].name)

trcs = atmcmc.ATMIP_sample(
                        n_steps=n_steps,
                        step=step,
                        njobs=njobs,
                        progressbar=True,
                        trace=test_folder,
                        model=ATMIP_test)

pm.summary(trcs)
Pltr = pm.traceplot(trcs, combined=True)
plt.show(Pltr[0][0])
Example #41
0
                trace = mc.sample(nsamples, step=step, start=start, njobs=self.njobs, trace=backend)
        return trace




if __name__ == "__main__":
    def real_func():
        x = np.linspace(0.01, 1.0, 10)
        f = x + np.random.randn(len(x))*0.01
        return f
        
    def model_func(beta):
        x = np.linspace(0.01, 1.0, 10)
        f = beta
        return f

    data = real_func()
    tau_obs = np.eye(10)/.01**2
    tau_prior = np.eye(10)/1.0**2
    beta_prior = np.ones_like(data)*1.0
    beta_map = np.linspace(0.01, 1.0, 10) + np.random.randn(10)*0.1
    sampler = MCMCSampler(model_func, data, tau_obs, beta_prior, tau_prior, beta_map, is_cov=False, method=None)
    trace = sampler.sample(2000)
    mc.summary(trace)
    mc.traceplot(trace)
    plt.figure()
    plt.plot(beta_map, label='ACTUAL')
    plt.plot(np.mean(trace['beta'][:,:], axis=0), label='MCMC')
    plt.show()
Example #42
0
}
"""


def get_garch_model():
    r = np.array([28, 8, -3, 7, -1, 1, 18, 12], dtype=np.float64)
    sigma1 = np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float64)
    alpha0 = np.array([10, 10, 16, 8, 9, 11, 12, 18], dtype=np.float64)
    shape = r.shape

    with Model() as garch:
        alpha1 = Uniform('alpha1', 0., 1., shape=shape)
        beta1 = Uniform('beta1', 0., 1 - alpha1, shape=shape)
        mu = Normal('mu', mu=0., sd=100., shape=shape)
        theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) +
                        beta1 * tt.pow(sigma1, 2))
        Normal('obs', mu, sd=theta, observed=r)
    return garch


def run(n=1000):
    if n == "short":
        n = 50
    with get_garch_model():
        tr = sample(n, tune=1000)
    return tr


if __name__ == '__main__':
    summary(run())
Example #43
0
def runModel():

    observation = simulateData()
    nTrans = len(observation['spectype'])

    # Create the pymc3 model and fill it with the distributions and parameters
    # of the model
    basic_model = Model()

    with basic_model:

        r"""
        Cosmology Node.

        The FlatwCDM cosmology.  

        pdf(Om0, w0)

        We need the flexibility to switch in and out different cosmological models.  The function
        that describes luminosity distance is specific to the model: the parameters and function
        should be packaged together.

        Parameters
        ----------
        Om0:    Omega_M
        w0:     constant equation of state w
        """

        Om0 = Lognormal('Om0', mu=numpy.log(0.28), tau=1/.1/.1)
        w0 = Normal('w0', mu=-1, sd=0.05)

        """
        Calibration Node.

        Global zeropoints for each band.

        pdf(Z)

        The transmission function of the bands will be used later.  The transmission and zeropoints
        should be packaged together. More complicated parameterizations of calibration are expected.

        Parameters
        -----------
        Z:  zeropoint (in mag) for the bands

        """
        n_bands = 1
        zeropoints = Normal('zeropoints', mu=0, sd=.02, shape = n_bands)

        """
        SN Ia Rate Node.  

        rate_Ia_r = constant

        For SN cosmology the relative rates between different populations are sufficient.  Rates of
        all types are relative the snIa rate, so snIa rate is taken to be 1.

        Parameters
        -----------
        rate_Ia_r =1    : the relative rates are relative to type Ia. Fixed.

        """

        rate_Ia_r = 1.


        """
        SN II Rate Node.

        The rate of SNe II realtiave SNIa.

        pdf(rate_II_r)

        Along with the rate parameters is a rate model.

        There should be equivalent nodes for all other transient types being modeled.

        Parameters
        ----------

        rate_II_r     : relative rate of SNe II compared to SNe Ia. 

        """

        rate_II_r = Uniform('rate_II_r', lower=0.25, upper=4)

        """
        SN Ia luminosity Node.  (actually working in log-L)

        pdf(logL_snIa, sigma_snIa)

        For the moment consider the SN to be phase-indepemdent with no internal parameters.  Eventually
        this will represent time-evolving SED, e.g. SALT2.


        Parameters
        ----------

        logL_snIa   :       SN Ia mean log-luminosity
        sigma_snIa :        intrinsic dispersion (mag)

        """
        logL_snIa = Normal('logL_snIa', mu=numpy.log(1), sd = 0.02)
        sigma_snIa = Lognormal('sigma_snIa', mu=numpy.log(0.1), tau=1./0.1/0.1)

        """
        SN Ia luminosity Node.  (actually working in log-L)

        pdf(logL_snII, sigma_snIa)

        Parameters
        ----------

        logL_snII   :       SN II mean log-luminosity
        sigma_snII :        intrinsic dispersion (mag)

        """
        logL_snII = Normal('logL_snII', mu=numpy.log(0.5), sd=0.02)
        sigma_snII = Lognormal('sigma_snII', mu=numpy.log(0.4), tau=1./0.1/0.1)

        """
        Enter the plate that considers one supernova at a time
        """

        for i in xrange(nTrans):

            """
            Type Probability Node.

            Probabilities of being a type of object.  For now only SN Ia, and SN II.

            Dependencies
            -------------

            rate_Ia_r   :   Type Ia rate
            rate_II_r   :   Type II rate
            host galaxy :   Not implemented now but eventually depends on host properties

            Parameters
            ----------

            prob :          probability of the object being a type Ia.  Fixed.
            """

            prob = rate_Ia_r/(rate_Ia_r+rate_II_r)


            """
            Type Node.

            Not explicitly considered in our model.
            """

            """
            Observed Type Node and Luminosity Node.

            pdf(Obs type, Luminosity | Type prob, logL_snIa, logL_snII)

            There are two possibilities:

            1. There is an observed type assumed to be perfect.

                pdf(Obs type | Type) = delta(Obs type - Type)

                then 
                
                pdf(Obs type, Luminosity | Type prob, logL_snIa, logL_snII)
                    = sum_i pdf(Obs type| Type_i) *
                        pdf(Luminosity | Type_i, logL_snIa, logL_snII) *
                        pdf(Type_i | Type prob)
                    = pdf(Luminosity | Type=Obs type, logL_snIa, logL_snII) *
                        pdf(Type=Obs type | Type prob)

                The class LogLuminosityGivenSpectype is responsible for providing this pdf

            2. There is no observed type.

                pdf(Luminosity | Type prob, logL_snIa, logL_snII)
                    = sum_i pdf(Luminosity | Type_i, logL_snIa, logL_snII) *
                        pdf(Type_i | Type prob)

                The class LuminosityMarginalizedOverType is responsible for providing this pdf

            Dependencies
            ------------

            prob        :
            logL_snIa   :
            sigma_snIa  :
            logL_snII   :
            sigma_snII  :

            Parameters
            ----------

            obstype         :   observed type, SN Ia=0, SNII=1 Marginalized over
            Luminosity      :

            """
            if observation['spectype'][i] == -1 :
                logluminosity = LogLuminosityMarginalizedOverType('logluminosity'+str(i), 
                    mus=[logL_snIa, logL_snII], \
                    sds = [numpy.log(10)/2.5*sigma_snIa,numpy.log(10)/2.5*sigma_snII], p=prob, \
                    testval = 1.)
            else:
                if observation['spectype'][i] == 0:
                    usemu = logL_snIa
                    usesd = numpy.log(10)/2.5*sigma_snIa
                    usep = prob
                else:
                    usemu = logL_snII
                    usesd = numpy.log(10)/2.5*sigma_snII
                    usep = 1-prob

                logluminosity = LogLuminosityGivenSpectype('logluminosity'+str(i), \
                        mu=usemu,sd=usesd, p=usep)
                
            luminosity = T.exp(logluminosity)

            """
            Redshift Node.

            Not considered explicitly in our model.

            """

            """
            Observed Redshift, Counts Node.

            pdf(observed redshift, Counts | Luminosity, Redshift, Cosmology, Calibration)
                = pdf(observed redshift| Redshift) *
                    pdf(Counts | Luminosity, Redshift, Cosmology, Calibration)

            The pdf of the observed redshift is assumed to be a sum of delta functions, perfectly
            measured redshift of the supernova or redshifts of potential galaxy hosts.

            pdf(observed redshift | Redshift) = sum_i p_i delta(observer redshift_i - Redshift)

            where p_i is the probability of observer redshift_i being the correct redshift.

            so

            pdf(observed redshift, Counts | Luminosity, Redshift, Cosmology, Calibration)
                = sum_i p_i pdf(Counts | Luminosity, Redshift=observer_redshift_i, Cosmology, Calibration)

            The class CountsWithThreshold handles this pdf

            Dependencies
            ------------

            luminosity  :   luminosity
            redshift    :   host redshift
            cosmology   :   cosmology
            Calibration :   calibration

            Parameters
            -----------

            observed_redshift   Marginalized over
            counts

            """

            lds=[]
            fluxes=[]
            for z_ in observation['specz'][i]:
                # ld = 0.5/h0*(z_+T.sqr(z_))* \
                #     (1+ 1//T.sqrt((1+z_)**3 * (Om0 + (1-Om0)*(1+z_)**(3*w0))))
                ld = luminosity_distance(z_, Om0, w0)
                lds.append(ld)
                fluxes.append(luminosity/4/numpy.pi/ld**2)

            counts = Counts('counts'+str(i),fluxes =fluxes,  \
                pzs = observation['zprob'][i], Z=zeropoints, observed=observation['counts'][i])

            if observation['spectype'][i] == -1 :
                pass
            else:
                normalization=SampleRenormalization('normalization'+str(i), threshold = 1e-9, 
                    logL_snIa=logL_snIa, sigma_snIa=sigma_snIa, logL_snII=logL_snII, sigma_snII=sigma_snII,
                    luminosity_distances=lds, Z=zeropoints, pzs=observation['zprob'][i], prob=prob, observed=1)

    from pymc3 import find_MAP, NUTS, sample, summary
    from scipy import optimize
    with basic_model:

        backend = SQLite('trace.sqlite')

        # obtain starting values via MAP
        start = find_MAP(fmin=optimize.fmin_bfgs, disp=True)

        # draw 2000 posterior samples
        trace = sample(500, start=start, trace=backend)

        summary(trace)
Example #44
0
 def posterior_summary(self, **kwargs):
     return pm.summary(self.posterior_, **kwargs)
Example #45
0
def mixed_effects():


    le = preprocessing.LabelEncoder()
    # Convert categorical variables to integer
    # participants_idx = le.fit_transform(messages['prev_sender'])

    classes = 'FF49_industry'
    # classes = 'underwriter_tier'
    # classes = 'amends'

    print("Grouping by: {}".format(classes))

    FF49_industry = le.fit_transform(df['FF49_industry'])
    class_idx = le.fit_transform(df[classes])
    n_classes = len(le.classes_)


    NSamples = 50000
    burn = NSamples/10
    thin = 2

    covariates = [
            'Intercept',
            '#Syndicate Members',
            '#Lead Underwriters',
            'Underwriter Rank',
            # 'FF49 Industry',
            'Amends Down',
            '#S1A Amendments',
            'Share Overhang',
            'log(1+Sales)',
            'log(Proceeds)',
            'CASI',
            # 'media_1st_pricing',
            # 'VC',
            'IPO Market Returns',
            'Industry Returns',
            'BAA Spread',
            ]

    y = df['days_to_first_price_update'].values
    # y = np.ma.masked_values(list(df.days_to_first_price_update), value=-999)



    with pm.Model() as model:

        # Parameters:
        intercept = pm.Gamma('Intercept', alpha=.1, beta=.1, shape=n_classes)

        beta_underwriter_syndicate_size = pm.Normal('#Syndicate Members', mu=0, sd=20)
        beta_underwriter_num_leads = pm.Normal('#Lead Underwriters', mu=0, sd=20)
        beta_underwriter_rank_avg = pm.Normal('Underwriter Rank', mu=0, sd=20)
        beta_num_SEC_amendments = pm.Normal('#S1A Amendments', mu=0, sd=20)
        # beta_FF49_industry = pm.Normal('FF49 Industry', mu=0, sd=20)
        beta_amends_down = pm.Normal('Amends Down', mu=0, sd=20)
        beta_share_overhang = pm.Normal('Share Overhang', mu=0, sd=20)
        beta_log_sales = pm.Normal('log(1+Sales)', mu=0, sd=20)
        beta_log_proceeds = pm.Normal('log(Proceeds)', mu=0, sd=20)
        beta_CASI = pm.Normal('CASI', mu=0, sd=20)
        # beta_media_1st_pricing = pm.Normal('media_1st_pricing', mu=0, sd=20)
        # beta_VC = pm.Normal('VC', mu=0, sd=20)
        beta_BAA_spread = pm.Normal('BAA Spread', mu=0, sd=20)
        beta_M3_initial_returns = pm.Normal('IPO Market Returns', mu=0, sd=20)
        beta_M3_indust_rets = pm.Normal('Industry Returns', mu=0, sd=20)

        # Hyperparameters
        ## alpha: hyperparameters for neg-binom distribution
        alpha = pm.Gamma('alpha', alpha=.1, beta=.1)



        # #Poisson Model Formula
        mu = 1 + tt.exp(
                intercept[class_idx]
                + beta_underwriter_syndicate_size * df.underwriter_syndicate_size
                + beta_underwriter_num_leads * df.underwriter_num_leads
                + beta_underwriter_rank_avg * df.underwriter_rank_avg
                # + beta_FF49_industry * FF49_industry
                + beta_amends_down * df['Amends Down']
                + beta_num_SEC_amendments * df.num_SEC_amendments
                + beta_share_overhang * df['Share Overhang']
                + beta_log_sales * df['log(1+Sales)']
                + beta_CASI * df['CASI']
                + beta_log_proceeds * df['log(Proceeds)']
                # + beta_media_1st_pricing * df.media_1st_pricing
                # + beta_VC * df.VC
                + beta_BAA_spread * df['BAA Spread']
                + beta_M3_initial_returns * df.M3_initial_returns
                + beta_M3_indust_rets * df.M3_indust_rets
                    )

        # Dependent Variable
        BoundedNegativeBinomial = pm.Bound(pm.NegativeBinomial, lower=1)
        y_est = BoundedNegativeBinomial('y_est', mu=mu, alpha=alpha, observed=y)
        y_pred = BoundedNegativeBinomial('y_pred', mu=mu, alpha=alpha, shape=y.shape)
        # y_est = pm.NegativeBinomial('y_est', mu=mu, alpha=alpha, observed=y)
        # y_pred = pm.NegativeBinomial('y_pred', mu=mu, alpha=alpha, shape=y.shape)
        # y_est = pm.Poisson('y_est', mu=mu, observed=data)
        # y_pred = pm.Poisson('y_pred', mu=mu, shape=data.shape)

        start = pm.find_MAP()
        step = pm.Metropolis(start=start)
        # step = pm.NUTS()
        # backend = pm.backends.Text('test')
        # trace = pm.sample(NSamples, step, start=start, chain=1, njobs=2, progressbar=True, trace=backend)
        trace = pm.sample(NSamples, step, start=start, njobs=1, progressbar=True)

        trace2 = trace
        trace = trace[-burn::thin]

        # waic = pm.waic(trace)
        # dic = pm.dic(trace)



    # with pm.Model() as model:
    #     trace_loaded = pm.backends.sqlite.load('FF49_industry.sqlite')
        # y_pred.dump('FF49_industry_missing/y_pred')


    ## POSTERIOR PREDICTIVE CHECKS
    y_pred = trace.get_values('y_pred')
    pm.summary(trace, vars=covariates)


    # PARAMETER POSTERIORS
    anno_kwargs = {'xycoords': 'data', 'textcoords': 'offset points',
                    'rotation': 90, 'va': 'bottom', 'fontsize': 'large'}
    anno_kwargs2 = {'xycoords': 'data', 'textcoords': 'offset points',
                    'rotation': 0, 'va': 'bottom', 'fontsize': 'large'}


    n0, n1, n2, n3 = 1, 5, 9, 14 # numbering for posterior plots
    # intercepts
    # mn = pm.df_summary(trace)['mean']['Intercept_log__0']
    # ax[0,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(0,15), color=blue, **anno_kwargs2)
    # mn = pm.df_summary(trace)['mean']['Intercept_log__1']
    # ax[0,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(0,15), color=purple, **anno_kwargs2)
    # coeffs
    # mn = pm.df_summary(trace)['mean'][2]
    # ax[1,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5, 10), color=red, **anno_kwargs)
    # mn = pm.df_summary(trace)['mean'][3]
    # ax[2,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    # mn = pm.df_summary(trace)['mean'][4]
    # ax[3,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    # plt.savefig('figure1_mixed.png')

    ax = pm.traceplot(trace, vars=['Intercept']+trace.varnames[n0:n1],
            lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()}
            )

    for i, mn in enumerate(pm.df_summary(trace)['mean'][n0:n1]): # +1 because up and down intercept
        ax[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    plt.savefig('figure1_mixed.png')


    ax2 = pm.traceplot(trace, trace.varnames[n1:n2],
            lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()}
            )
    for i, mn in enumerate(pm.df_summary(trace)['mean'][n1:n2]): # +1 because up and down intercept
        ax2[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    plt.savefig('figure2_mixed.png')



    ax3 = pm.traceplot(trace, trace.varnames[n2:n3],
            lines={k: v['mean'] for k, v in pm.df_summary(trace).iterrows()}
            )
    for i, mn in enumerate(pm.df_summary(trace)['mean'][n2:n3]): # +1 because up and down intercept
        ax3[i,0].annotate('{:.3f}'.format(mn), xy=(mn,0), xytext=(5,10), color=red, **anno_kwargs)
    plt.savefig('figure3_mixed.png')


    # _ = plt.figure(figsize=(5, 6))
    _ = pm.forestplot(trace, vars=['Intercept'], ylabels=le.classes_)
    plt.savefig('forestplot_intercepts.png')
    _ = pm.forestplot(trace, vars=covariates[1:], ylabels=covariates[1:])
    plt.savefig('forestplot_mixed.png')

    # pm.traceplot(trace, vars=['alpha', 'y_pred'])



    # def participant_y_pred(entity_name, burn=1000, hierarchical_trace=trace):
    #     """Return posterior predictive for person"""
    #     ix = np.where(le.classes_ == entity_name)[0][0]
    #     return hierarchical_trace['y_pred'][burn:, ix]

    def participant_y_pred(entity_name, burn=1000, ypred=y_pred):
        """Return posterior predictive for person"""
        ix = np.where(le.classes_ == entity_name)[0][0]
        return ypred[burn:, ix]

    days = 7

    fig = plt.figure(figsize=(16,10))
    fig.add_subplot(221)
    entity_plotA('Up', days=days)
    fig.add_subplot(222)
    entity_plotB('Up')

    fig.add_subplot(223)
    entity_plotA('Down', days=days)
    fig.add_subplot(224)
    entity_plotB('Down')
    plt.savefig("figure4-postpreddist-updown")
Example #46
0
def run(n=5000):
    with model_1:
        trace = pm.sample(n)

        pm.summary(trace)
Example #47
0
def get_garch_model():
    r = np.array([28, 8, -3, 7, -1, 1, 18, 12])
    sigma1 = np.array([15, 10, 16, 11, 9, 11, 10, 18])
    alpha0 = np.array([10, 10, 16, 8, 9, 11, 12, 18])
    shape = r.shape

    with Model() as garch:
        alpha1 = Normal('alpha1', mu=np.zeros(shape=shape), sd=np.ones(shape=shape), shape=shape)
        BoundedNormal = Bound(Normal, upper=(1 - alpha1))
        beta1 = BoundedNormal('beta1',
                              mu=np.zeros(shape=shape),
                              sd=1e6 * np.ones(shape=shape),
                              shape=shape)
        mu = Normal('mu', mu=np.zeros(shape=shape), sd=1e6 * np.ones(shape=shape), shape=shape)
        theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) +
                        beta1 * tt.pow(sigma1, 2))
        Normal('obs', mu, sd=theta, observed=r)
    return garch


def run(n=1000):
    if n == "short":
        n = 50
    with get_garch_model():
        tr = sample(n, n_init=10000)
    return tr


if __name__ == '__main__':
    print(summary(run()))
Example #48
0
                                                 refueling))
    
    rough_weather = pmd.Bernoulli('Rough Weather', p=0.35)
    
    flight_time = pmc.Exponential('Flight Time', lam=0.5 - (0.1 * rough_weather))
    arrival_traffic_delay = pmc.Wald('Arrival Traffic Delay', mu=0.1, lam=0.2)
    
    arrival_time = pm.Deterministic('Arrival time', 
                                    departure_time + 
                                    flight_time + 
                                    arrival_traffic_delay)
nb_samples = 500
with model:
    samples = pm.sample(draws=nb_samples, random_seed=1000)

pm.summary(samples)

## Bayesian binomial-beta
%matplotlib inline
import pymc3 as pm
import seaborn as sb
from theano import config
config.warn.round=False

n = 1000
obs_v1 = 680
obs_v2 = 700

with pm.Model() as model: # context management
    # define priors
    prior_v1 = pm.Beta('prior_v1', alpha=2, beta=2)
Hans_Model = pm.Model()
with Hans_Model:
    # Define prior
    alpha = pm.Normal('alpha_est',mu=0,sd=10)
    beta = pm.Normal('beta_est',mu=0,sd=10,shape=2)
    sigma=pm.HalfNormal('sigma_est',sd=1)

    # Model parameter
    mu = alpha + beta[0]*X1 + beta[1]*X2

    # Likelihood
    Y_rv = pm.Normal('Y_rv',mu=mu,sd=sigma,observed=Y)



''' Model fitting'''
with Hans_Model:
# step = pm.Metropolis(vars=[alpha,beta,sigma])
    param_MAP = pm.find_MAP(fmin = sp.optimize.fmin_powell)
    Method = pm.Slice(vars=[alpha,beta,sigma])
    trace = pm.sample(Niter,step=Method,start=param_MAP)

pm.traceplot(trace)

print pm.summary(trace)

plt.show()
#
# plt.plot(trace['alpha_est'])
# print pm.summary(trace)
# plt.show()
Example #50
0
import theano.tensor as T
from load_data import load_australian_credit, load_german_credit, load_heart, load_pima_indian
import pymc3 as pm
import numpy as np
from pymc3 import summary
from pymc3 import traceplot

germanData, germanLabel = load_australian_credit()
# germanData, germanLabel = load_pima_indian()
# normalize to let each dimension have mean 1 and std 0
g_mean = np.mean(germanData, axis=0)
g_std = np.std(germanData, axis=0)
germanData = (germanData - g_mean) / g_std


with pm.Model() as model:
    alpha = pm.Normal("alpha_pymc3", mu=0.0, tau=1e-2)
    beta = pm.Normal("beta_pymc3", mu=0.0, tau=1e-2, shape=14)  # for australian data, it has 14 predictors
    y_hat_prob = 1.0 / (1.0 + T.exp(-(T.sum(beta * germanData, axis=1) + alpha)))
    yhat = pm.Bernoulli("yhat", y_hat_prob, observed=germanLabel)
    trace = pm.sample(10000, pm.NUTS())

trace1 = trace[5000:]  # get rid of the burn-in samples
summary(trace1)
traceplot(trace1)

alpha_mean = np.mean(trace1["alpha_pymc3"])
beta_mean = np.mean(trace1["beta_pymc3"], axis=0)
param_mean = (np.sum(alpha_mean) + np.sum(beta_mean)) / 15.0
print " the overall mean of the parameters: ", param_mean
Example #51
0
print(map_estimate)


from pymc3 import NUTS, sample
from pymc3 import traceplot

with basic_model:

    # obtain starting values via MAP
    start = find_MAP(fmin=optimize.fmin_powell)

    # instantiate sampler
    step = NUTS(scaling=start)

    # draw 2000 posterior samples
    trace = sample(2000, step, start=start)
    trace['alpha'][-5:]
    traceplot(trace)
    plt.show()




from pymc3 import summary
summary(trace)

n = 500
p = 0.3
with Model():
	x = Normal('alpha', mu=0, sd=10)
	print type(x)
    with mdl_ols:

        ## find MAP using Powell, seems to be more robust
        t1 = time.time()
        start_MAP = pm.find_MAP(fmin=optimize.fmin_powell)
        t2 = time.time()
        print("Found MAP, took %f seconds" % (t2 - t1))

        ## take samples
        t1 = time.time()
        traces_ols = pm.sample(2000, start=start_MAP, step=pm.NUTS(), progressbar=True)
        print()
        t2 = time.time()
        print("Done sampling, took %f seconds" % (t2 - t1))

    pm.summary(traces_ols)
    ## plot the samples and the marginal distributions
    _ = pm.traceplot(
        traces_ols,
        figsize=(12, len(traces_ols.varnames) * 1.5),
        lines={k: v["mean"] for k, v in pm.df_summary(traces_ols).iterrows()},
    )
    plt.show()


do_tstudent = False

if do_tstudent:

    print("Robust Student-t analysis...")
    else:
        fit_results = np.array([out.values['decay']*delta_t,
                            np.sqrt(out.covar[0,0])*delta_t,
                            out.values['amplitude'],
                            np.sqrt(out.covar[1,1])])
        print(out.fit_report(min_correl=0.25))

    trace = sm.run(x=data,
                    aB=alpha_B,
                    bB=beta_B,
                    aA=alpha_A,
                    bA=beta_A,
                    delta_t=delta_t,
                    N=N)

    pm.summary(trace)

    traceB_results = np.percentile(trace['B'],(2.5,25,50,75,97.5))
    traceB_results = np.concatenate((traceB_results, [np.std(trace['B'])], [np.mean(trace['B'])]))

    traceA_results=np.percentile(trace['A'],(2.5,25,50,75,97.5))
    traceA_results = np.concatenate((traceA_results, [np.std(trace['A'])], [np.mean(trace['A'])]))

    results = np.concatenate((data_results, fit_results, traceB_results, traceA_results))

    print(results)

    if result_array is None:
        result_array = results
    else:
        result_array = np.vstack((result_array, results))
Example #54
0
import pymc3 as pm
import seaborn as sn
import matplotlib.pyplot as plt

with pm.Model() as model:
    uniform = pm.Uniform('uniform', lower=0, upper=1)
    normal = pm.Normal('normal', mu=0, sd=1)
    beta = pm.Beta('beta', alpha=0.5, beta=0.5)
    exponential = pm.Exponential('exponential', 1.0)

    trace = pm.sample(2000)

print(pm.summary(trace).round(2))

pm.traceplot(trace)
plt.show()
	N_samples = [30, 30, 30]  # total number of each groups
	G_samples = [18, 18, 18]  # record of the number of good-quality samples

	group_idx = np.repeat(np.arange(len(N_samples)), N_samples)
	data = []
	for i in range(0, len(N_samples)):
		data.extend(np.repeat([1, 0], [G_samples[i], N_samples[i]-G_samples[i]]))
	print(group_idx, data)

	base_name = os.path.basename(__file__)[:-3]
	with pm.Model() as model_h,\
			matplotlib.backends.backend_pdf.PdfPages('%s.pdf' % base_name) as pdf_all:
		# prior
		alpha = pm.HalfCauchy('alpha', beta=10)
		beta = pm.HalfCauchy('beta', beta=10)
		theta = pm.Beta('theta', alpha, beta, shape=len(N_samples))

		# likehood
		y = pm.Bernoulli('y', p=theta[group_idx], observed=data)

		trace = pm.sample(2000, njobs=1)

		chain = trace[200:]
		fig = plt.figure()
		pm.traceplot(chain)
		pdf_all.savefig()
		
		# mean, standard deviation, and the HPD intervals
		print(pm.summary(trace))

Example #56
0
    step1 = pm.NUTS([Pb, mub, sb, b, m])
    step2 = pm.BinaryMetropolis([qi], tune_interval=100)
    step = [step1, step2]

    samples = pm.sample(niter, start=start_MAP, step=[step1, step2], progressbar=True)


## Declare a point as an outlier if its qi is 0 in more than 99% of the MCMC samples
cutoff = 1
outlier = np.percentile(1 - samples[burnin:]["qi"], cutoff, axis=0)
outlier = outlier.astype(bool)
# the variable 'outlier' is an array of size N with True for outlier points and False for inlier points
# the points that are identified as outlier can change from run to run, especially if niter is small

## print a summary of the results
pm.summary(samples[burnin:])


## plot the samples and the marginal distributions
## using the built-in PyMC3 functions
pm.traceplot(samples[burnin:])
plt.show()


## in the previous plots and results you will also see
## the parameters s_b_log and Pb_interval
## these are created automatically by PyMC3
## but we don't need to worry about them


# the following two definitions of the function 'lm' are equivalent