Esempio n. 1
0
 def show_posteriors(self, kde=True):
     if hasattr(self, 'burned_trace'):
         pm.plots.traceplot(trace=self.burned_trace, varnames=["std", "beta", "alpha"])
         pm.plot_posterior(trace=self.burned_trace, varnames=["std", "beta", "alpha"], kde_plot=kde)
         pm.plots.autocorrplot(trace=self.burned_trace, varnames=["std", "beta", "alpha"])
     else:
         print("You must sample from the posteriors first.")
Esempio n. 2
0
def effect_difference(effect1, effect2, name1, name2, CI=95.0, show=True):
    diff = effect1 - effect2
    label = str(name1) + ' - ' + str(name2)
    plt.figure(figsize=(4, 3))
    plt.locator_params(nbins=4)
    plt.hist(diff, bins=100, label=label)
    plt.legend()
    if show: plt.show()

    fig = plt.figure(figsize=(3, 3))
    plt.locator_params(nbins=4)
    ax = fig.gca()
    pm.plot_posterior(effect1 - effect2,
                      varnames=[name1, name2],
                      ref_val=0,
                      color='#87ceeb',
                      ax=ax)
    ax.set_title(label)
    if show: plt.show()

    low_p = (100.0 - CI) / 2.0
    high_p = low_p + CI
    print(label,
          str(CI) + ' CI:', np.percentile(diff, low_p),
          np.percentile(diff, high_p), 'Pr > 0:', (diff > 0).mean())
    return fig
Esempio n. 3
0
def do_differences(recalculate=False):
    trace_cols = ["duration", "dist_err", "x_err", "y_err", "rms_x", "rms_y"]
    trace_coeffs = [[(5, 50), 50]] + [[(0, 2), 0.5]]*5
    trace_cols = ["path_length", "move_l", "move_r", "move_x", "move_b",
                  "move_f", "move_y"]
    trace_coeffs = [[(0, 10), 10], [(0, 5), 3], [(0, 5), 3], [(0, 10), 6],
                    [(0, 5), 1.5], [(0, 5), 7], [(0, 10), 9]]

    if recalculate:
        traces = analyze_differences(analyses, trace_cols, trace_coeffs)
    else:
        traces = {col: load_best_result(col) for col in trace_cols}
    for col, best_result in traces.items():
        trace = best_result.trace
        with figure(f"mean_std_{col}"):
            ax = pm.plot_posterior(trace[100:],
                                   varnames=[r"group1_mean", r"group2_mean",
                                             r"group1_std", "group2_std"],
                                   kde_plot=True, color="C0")
            for a in (1, 3):
                ax[a].lines[0].set_color("C1")

        with figure(f"difference_{col}"):
            pm.plot_posterior(trace[1000:],
                              varnames=["difference of means", "effect size"],
                              ref_val=0, kde_plot=True, color="C2")
Esempio n. 4
0
def summarize(best_result, kde=True, plot=True):
    trace, model = best_result
    if plot:
        ax = pm.plot_posterior(trace[100:],
                               varnames=[
                                   r"group1_mean", r"group2_mean",
                                   r"group1_std", "group2_std", r"ν_minus_one"
                               ],
                               kde_plot=kde,
                               color="C0")
        if kde:
            for a in (1, 3):
                ax[a].lines[0].set_color("C1")
        plt.figure()
        pm.plot_posterior(trace[1000:],
                          varnames=[
                              "difference of means", "difference of stds",
                              "effect size"
                          ],
                          ref_val=0,
                          kde_plot=True,
                          color="C2")
        plt.figure()
        pm.forestplot(trace[1000:], varnames=[v.name for v in model.vars[:2]])
        plt.figure()
        pm.forestplot(trace[1000:], varnames=[v.name for v in model.vars[2:]])

    pm.summary(
        trace[1000:],
        varnames=["difference of means", "difference of stds", "effect size"])
Esempio n. 5
0
def main():

    with pm.Model() as model:
        # Using a strong prior. Meaning the mean is towards zero than towards 1
        prior = pm.Beta('prior', 0.5, 3)

        output = pm.Binomial('output', n=100, observed=50, p=prior)

        step = pm.Metropolis()
        trace = pm.sample(1000, step=step)
        pm.traceplot(trace)

    pm.plot_posterior(trace, figsize=(5, 5), kde_plot=True,
                      rope=[0.45, 0.55])  # Rope is an interval that you define
    # This is a value you eppect. You can check
    # If ROPE fall on HPD or not. If it falls, it means
    # our value is within HPD and may be increasing sample
    # size would make our mean estimate better.

    # gelman rubin
    pm.gelman_rubin(trace)

    # forestplot
    pm.forestplot(trace, varnames=['prior'])

    # summary [look at mc error here. This is the std error, should be low]
    pm.df_summary(trace)

    #autocorrelation
    pm.autocorrplot(trace)

    # effective size
    pm.effective_n(trace)['prior']
Esempio n. 6
0
def main():
    # Hyperparameters
    n_flips = 125
    n_coins = 10
    n_draws = 5000
    n_init_steps = 10000
    n_burn_in_steps = 1000

    # Create Causal Distribution
    causal_probs = np.random.uniform(size=n_coins)

    # Create Observations
    X = np.array([
        np.random.choice(2, p=[1 - p_, p_], size=n_flips)
        for i, p_ in enumerate(causal_probs)
    ]).T

    # Create Model
    with pm.Model() as model:
        ps = pm.Beta('probs', alpha=1, beta=1, shape=n_coins)
        components = pm.Bernoulli.dist(p=ps, shape=n_coins)
        w = pm.Dirichlet('w', a=np.ones(n_coins))
        mix = pm.Mixture('mix', w=w, comp_dists=components, observed=X)

    # Train Model
    with model:
        trace = pm.sample(n_draws, n_init=n_init_steps, tune=n_burn_in_steps)

    # Display Results
    pm.plot_trace(trace, var_names=['w', 'probs'])
    plt.show()
    pm.plot_posterior(trace, var_names=['w', 'probs'])
    plt.show()
Esempio n. 7
0
def display_posterior(trace, filename):
    prj = project_reader(filename)
    WP_NAMES = np.array(prj[1][:, 0])
    WP_NUMBER = prj[1][:, 0].shape[0]
    PV_names = list()
    PVpartial_names = list()
    EV_names = list()
    COMP_names = list()
    SPI_names = list()
    CPI_names = list()
    Index_names = ["SPI_PROJECT", "CPI_PROJECT", "ETC", "EAC", "TEAC"]

    RISK_names = list()
    projectDefinition = prj[1]

    for x in range(WP_NUMBER):
        for y in range(2):
            if (projectDefinition[x][y + 1] != 0):
                rname = projectDefinition[x][0] + "_Risk_%d" % (y + 1)
                RISK_names.append(rname)

    for x in range(WP_NUMBER):
        PV_names.append("PV_%s" % WP_NAMES[x])
        PVpartial_names.append("Partial_PV_%s" % WP_NAMES[x])
        EV_names.append("EV_%s" % WP_NAMES[x])
        COMP_names.append("COMPLETION_%s" % WP_NAMES[x])
        SPI_names.append("SPI_%s" % WP_NAMES[x])
        CPI_names.append("CPI_%s" % WP_NAMES[x])
    all_names = RISK_names + PV_names + PVpartial_names + EV_names + COMP_names + SPI_names + CPI_names + Index_names

    print(
        pm.summary(trace,
                   varnames=all_names,
                   stat_funcs=[trace_mean, trace_sd, trace_quantiles]))
    pm.plot_posterior(trace, varnames=all_names)
Esempio n. 8
0
def save_mat_fig(trace_array, gtype="traceplot"):
    """
    save the traceplot array to an image source
    :param trace_array:
    :return:
    """
    # fig = Figure()
    if gtype == "traceplot":
        print("ENTER save_traceplot")
        pm.traceplot(trace_array, figsize=(12,12))
        print("PLT.GCF()")
        fig = plt.gcf()
        print("FIGURE")
        buf = io.BytesIO()
        print("BUFF")
        fig.savefig(buf, format="png")
        data = base64.b64encode(buf.getbuffer()).decode("utf8")
    elif gtype == "posterior":
        print("ENTER plot_posterior")
        pm.plot_posterior(trace_array, figsize=(12,12))
        fig = plt.gcf()
        buf = io.BytesIO()
        fig.savefig(buf, format="png")
        data = base64.b64encode(buf.getbuffer()).decode("utf8")
    else:
        data = []
    return "data:image/png;base64,{}".format(data)
Esempio n. 9
0
def plot_traces(traces, names, show=True):

    fig_1 = plt.figure()
    plt.locator_params(nbins=4)
    for (t, n) in zip(traces, names):
        plt.hist(t, bins=100, label=n)
    plt.legend()
    if show: plt.show()

    N = len(traces) + 1
    figs = []
    for (t, n, i) in zip(traces, names, range(1, N)):
        fig = plt.figure(figsize=(3, 3))
        plt.locator_params(nbins=4)
        ax = fig.gca()
        # ax = fig_2.add_subplot(1, N, i)
        pm.plot_posterior(t, varnames=[n], color='#87ceeb', ax=ax)
        ax.set_title(n)
        ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
        ax.locator_params(axis='x', nbins=4)
        ax.locator_params(axis='y', nbins=1)
        figs.append((n, fig))
    if show: plt.show()

    return fig_1, figs
Esempio n. 10
0
def main(n, observed):
    '''
    parameters
    --------
    n : int
        number of trials
    observed: int
         observed number of success      
    '''
    with pm.Model() as exam_model:
        # Week uniform prior for prior
        prior = pm.Beta('prior', 0.5, 0.5)

        # Bernouli trials modeled using binomial distribution
        obs = pm.Binomial('obs', n=n, p=prior, observed=observed)

        # plot model design
        pm.model_to_graphviz(exam_model)

        # Use metropolis hasting for sampling
        step = pm.Metropolis()

        # sample from the prior distribution to get the posterior
        trace = pm.sample(5000, step)

        # plot posterior
        pm.plot_posterior(trace)

        # calculate gelman rubin stats
        pm.gelman_rubin(trace)
Esempio n. 11
0
def fit_model(model):
    start = time.time()
    with model:
        step = pm.NUTS()
        trace = pm.sample(2000, step=step, chains=1, cores=1, tune=1000)
        print(time.time() - start)
        pm.plot_posterior(trace)
        pm.traceplot(trace)
Esempio n. 12
0
def update_bayesian_modeling(mean_upd, var_upd, alpha_upd, beta_upd, inv_a_upd,
                             inv_b_upd, iv_upd, strategy, stock_price,
                             strike_price, risk_free, time):
    with pm.Model() as update_model:
        prior = pm.InverseGamma('bv', inv_a_upd, inv_b_upd)

        likelihood = pm.InverseGamma('like',
                                     inv_a_upd,
                                     inv_b_upd,
                                     observed=iv_upd)

    with update_model:
        # step = pm.Metropolis()

        v_trace_update = pm.sample(10000, tune=1000)
        #print(v_trace['bv'][:])
        trace_update = v_trace_update['bv'][:]
        #print(trace)

    pm.traceplot(v_trace_update)
    plt.show()

    pm.autocorrplot(v_trace_update)
    plt.show()

    pm.plot_posterior(v_trace_update[100:],
                      color='#87ceeb',
                      point_estimate='mean')
    plt.show()

    s = pm.summary(v_trace_update).round(2)
    print("\n Summary")
    print(s)

    a = np.random.choice(trace_update, 10000, replace=True)
    ar = []
    for i in range(9999):
        t = a[i] / 100
        ar.append(t)
    #print("Bayesian Volatility Values", ar)

    op = []
    for i in range(9999):
        temp = BS_price(strategy, stock_price, strike_price, risk_free, ar[i],
                        time)
        op.append(temp)
    #print("Bayesian Option Prices", op)

    plt.hist(ar, bins=50)
    plt.title("Volatility")
    plt.ylabel("Frequency")
    plt.show()

    plt.hist(op, bins=50)
    plt.title("Option Price")
    plt.ylabel("Frequency")
    plt.show()
    return trace_update
Esempio n. 13
0
def plot_ADVI_posterior(NNInput, ADVIApprox):

    if (NNInput.PlotShow):
        fig = plt.figure()
        pymc3.plot_posterior(ADVIApprox.sample(NNInput.NApproxSamplesADVI),
                             color='LightSeaGreen')
        plt.show()
        FigPath = NNInput.PathToOutputFldr + '/ADVI_Posterior.png'
        fig.savefig(FigPath)
Esempio n. 14
0
def plot_posterior(trace, *args, **kwargs):
    """Plot posterior of random variables.

  Default behaviour is to plot all random variables.
  Variables can be selectively plotted by mentioning them in `var_names` argument.
  """
    # convert vector-valued variables into multiple scalars
    trace, vectors = disentangle_trace(trace)
    # resolve variable names
    var_names = resolve_var_names(kwargs.get('var_names'), trace, vectors)
    if var_names is not None:
        kwargs['var_names'] = var_names
    pm.plot_posterior(trace, *args, **kwargs)  # `pymc3.traceplot`
Esempio n. 15
0
def model_pymc(std_prior_lower=100.0, std_prior_upper=1000.0):
    with pm.Model() as model:

        group_sa_mean = pm.Normal('SA_mean', prior_1.mean(), sd=prior_1.std())
        group_me_mean = pm.Normal('ME_mean', prior_2.mean(), sd=prior_2.std())

#     std_prior_lower = 100.0
#     std_prior_upper = 1000.0 # changed to more accurately reflect the standard deviations here

    with model:

        group_sa_std = pm.Uniform('SA_std',
                                  lower=std_prior_lower,
                                  upper=std_prior_upper)
        group_me_std = pm.Uniform('ME_std',
                                  lower=std_prior_lower,
                                  upper=std_prior_upper)

        group_sa = pm.Normal('SA_acts',
                             mu=group_sa_mean,
                             sd=group_sa_std,
                             observed=sa_80s)
        group_me = pm.Normal('ME_acts',
                             mu=group_me_mean,
                             sd=group_me_std,
                             observed=me_00s)

        diff_of_means = pm.Deterministic('difference of means',
                                         group_sa_mean - group_me_mean)
        diff_of_stds = pm.Deterministic('difference of stds',
                                        group_sa_std - group_me_std)
        effect_size = pm.Deterministic(
            'effect size', diff_of_means / np.sqrt(
                (group_sa_std**2 + group_me_std**2) / 2))

    with model:
        trace = pm.sample(25000, njobs=4)

    pm.plot_posterior(trace[3000:],
                      varnames=['SA_mean', 'ME_mean', 'SA_std', 'ME_std'],
                      color='#F4953B')

    pm.plot_posterior(
        trace[3000:],
        varnames=['difference of means', 'difference of stds', 'effect size'],
        ref_val=0,
        color='#87ceeb')
    plt.show()
Esempio n. 16
0
def summary(trace, varnames=None, plot_convergence_stats=False):
    pm.plot_posterior(trace, varnames=varnames)
    plt.show()
    pm.plots.traceplot(trace, varnames=varnames)
    plt.show()
    pm.plots.forestplot(trace, varnames=varnames)
    plt.show()
    if plot_convergence_stats:
        pm.plots.energyplot(trace)
        plt.show()
        print(
            'Gelman-Rubin ',
            max(
                np.max(gr_values)
                for gr_values in pm.gelman_rubin(trace).values()))
    return pm.summary(trace, varnames=varnames)
Esempio n. 17
0
def dice_bias():
    y = np.asarray([20,  21, 17, 19, 17, 30])
    k = len(y)
    p = 1/k
    n = y.sum()

    with pm.Model() as dice_model:
        
        # initializes the Dirichlet distribution with a uniform prior:
        a = np.ones(k) 
        
        theta = pm.Dirichlet("theta", a=a)
        
        # Since theta[5] will hold the posterior probability 
        # of rolling a 6 we'll compare this to the 
        # reference value p = 1/6 to determine the amount of bias
        # in the die 
        six_bias = pm.Deterministic("six_bias", theta[k-1] - p)
        
        results = pm.Multinomial("results", n=n, p=theta, observed=y)
        dice_trace = pm.sample(draws=1000) 
        pm.traceplot(dice_trace, combined=True, lines={"theta": p})

    
    axes = pm.plot_posterior(dice_trace, 
                          varnames=["theta"], 
                          ref_val=np.round(p, 3))
    for i, ax in enumerate(axes):
        ax.set_title(f"{i+1}")

    six_bias = dice_trace["six_bias"]
    six_bias_perc = len(six_bias[six_bias>0])/len(six_bias)
    plt.show() 
    print(f'P(Six is biased) = {six_bias_perc:.2%}')
Esempio n. 18
0
def excel_posterior(trace, filename):

    #Need to read the data again to set activity number and names
    prj = project_reader(filename)
    WP_NAMES = np.array(prj[1][:, 0])
    WP_NUMBER = prj[1][:, 0].shape[0]

    PV_names = list()
    PVpartial_names = list()
    EV_names = list()
    COMP_names = list()
    SPI_names = list()
    CPI_names = list()
    Index_names = ["SPI_PROJECT", "CPI_PROJECT", "ETC", "EAC", "TEAC"]

    RISK_names = list()
    projectDefinition = prj[1]

    for x in range(WP_NUMBER):
        for y in range(2):
            if (projectDefinition[x][y + 1] != 0):
                rname = projectDefinition[x][0] + "_Risk_%d" % (y + 1)
                RISK_names.append(rname)

    for x in range(WP_NUMBER):
        PV_names.append("PV_%s" % WP_NAMES[x])
        PVpartial_names.append("Partial_PV_%s" % WP_NAMES[x])
        EV_names.append("EV_%s" % WP_NAMES[x])
        COMP_names.append("COMPLETION_%s" % WP_NAMES[x])
        SPI_names.append("SPI_%s" % WP_NAMES[x])
        CPI_names.append("CPI_%s" % WP_NAMES[x])
    all_names = RISK_names + PV_names + PVpartial_names + EV_names + COMP_names + SPI_names + CPI_names + Index_names

    outputName = filename + "Output.xlsx"
    traceName = filename + "Trace.xlsx"
    pm.summary(trace,
               varnames=all_names,
               stat_funcs=[trace_mean, trace_sd,
                           trace_quantiles]).to_excel(outputName,
                                                      sheet_name="Summary")
    pm.plot_posterior(trace, varnames=all_names)
    pm.trace_to_dataframe(trace).to_excel(traceName, sheet_name="Trace")
Esempio n. 19
0
def plot_difference_of_means(trace, **kwargs):
    """
    Plots a difference of means graph

    @param trace a trace object
    @param **kwargs keyword arguments for matplotlib
    @returns a plot axes with the graph plotted
    """
    ps1 = pm.plot_posterior(trace, varnames=['difference of means'],
                            ref_val=0,
                            color='#87ceeb', **kwargs)
    return ps1
Esempio n. 20
0
File: model.py Progetto: jancr/ppv
 def _plot_posterior(self, betaj, save_path=None, axes_size=4, shape=None,
                     credible_interval=0.94, color_bad=True, beta0=None):
     fig, axes = self._predictor_canvas(self.predictors, axes_size, 1, shape=shape)
     if beta0 is None:
         beta0 = betaj[0]
         betaj = betaj[1:]
     pm.plot_posterior(beta0.values, point_estimate='mode', ax=axes[0, 0], color=color)
     axes[0, 0].set_xlabel(r'$\beta_0$ (Intercept)', fontdict=f_dict)
     axes[0, 0].set_title('', fontdict=f_dict)
     columns = self.predictors.columns
     for i, (ax, feature) in enumerate(zip(axes.flatten()[1:], columns)):
         pm.plot_posterior(betaj[feature].values, point_estimate='mode', 
                         credible_interval=credible_interval, ax=ax, color=color)
         ax.set_title('', fontdict=f_dict)
         ax.set_xlabel(r'$\beta_{{{}}}$ ({})'.format(i + 1, ' '.join(feature)),
                     fontdict=f_dict)
         if color_bad and not self.is_credible(self.trace['zbetaj'][:, i], credible_interval):
             ax.patch.set_facecolor('#FFCCCB')
             ax.patch.set_visible(True)
     if save_path is not None:
         fig.savefig(save_path, transparent=True)
     return fig
Esempio n. 21
0
    def applyBayesianSampling(self, final_col_list):
        formula = 'G3 ~ ' + ' + '.join([i for i in final_col_list])
        print('formula : ', formula)

        # Context for the model
        with pm.Model() as normal_model:
            # Setting the likelihood as a normal distribution
            family = pm.glm.families.Normal()

            # Creating the model using the family, data and formula
            pm.GLM.from_formula(formula, data=self.X_train, family=family)
            # Perform Markov Chain Monte Carlo sampling
            normal_trace = pm.sample(draws=3000, chains=2, tune=1200, cores=-1)

            print(pm.summary(normal_trace))
            pm.traceplot(normal_trace)
            plt.show()
            pm.plot_posterior(normal_trace)
            plt.show()
            blr_formula = self.bayesianLRFormula(normal_trace)
            print('blr_formula :', blr_formula)

        return normal_trace, blr_formula
def cmt_example():
    obs = {
        'n1_Hp_Rp': 519,
        'n1_Hp': 10473,
        'P_Hp': 0.1561185895315763,
        'n_Hp_Rp': 42,
        'n_Hn_Rp': 2,
        'n_Hp_Rn': 687,
        'n_Hn_Rn': 3624
    }

    trace = sample_heuristic_precision(obs, {'draws': 10000, 'tune': 5000})

    pm.plot_posterior(trace, credible_interval=0.94)
    pm.plot_posterior(trace, credible_interval=0.99)

    help(pm.plot_posterior)

    pm.traceplot(trace)
    pm.forestplot(trace)

    q_samples = trace['q']
    np.average(q_samples < 0.03)
Esempio n. 23
0
    def plot_posterior(self, varnames=None, ref_val=None):
        """Generate informative plots form the trace.
        
           Parameters
           ----------
           varnames : iterable of str or None, optional
               The model variables to generate plots for (default None).
               If None, defaults to all variables.
           ref_val: int or float or None, optional
               The value to use as reference on the plots (default None).
               Generally only relevant for posteriors on differences of means 
               and standard deviations. For example, if ref_val = 0, a bar will
               be placed on the posterior plot at a point corresponding to
               zero difference in parameters. If this bar lies within the 95% HPD,
               then it is likely that there is no significant difference between 
               the parameters.
        """

        varnames = varnames or self.model_variables
        pm.plot_posterior(self.trace,
                          varnames=varnames,
                          ref_val=ref_val,
                          color='#8BCAF1')
Esempio n. 24
0
 def plot_posterior(self,
                    varnames=["estimate"],
                    ref_val=np.log2(3),
                    color="LightSeaGreen",
                    rope=[-.4, .4],
                    xlim=[-.5, 2],
                    suffix=""):
     plt.rcParams.update({'font.size': 15, 'figure.figsize': (7, 5)})
     pm.plot_posterior(self.trace,
                       varnames=varnames,
                       ref_val=ref_val,
                       color=color,
                       rope=rope)
     plt.title("log2FC Posterior for {}".format(self.p.split(";")[0]))
     plt.xlim(xlim)
     plt.ylabel("Probability")
     plt.savefig(os.path.join(self.plot_dir, "posteriors", "eps",
                              "{}{}.eps".format(self.p, suffix)),
                 format='eps',
                 dpi=900)
     plt.savefig(
         os.path.join(self.plot_dir, "posteriors", "png",
                      "{}{}.png".format(self.p, suffix)))
     plt.close()
def getAngelRate(data, n_sample=10000, n_chain=3, ax=None):
    # データの整理
    data_0 = data.query('campaign != 1')
    data_1 = data.query('campaign == 1')
    d = np.array([[
        sum(data_0['angel'] == 0),
        sum(data_0['angel'] == 1),
        sum(data_0['angel'] == 2)
    ],
                  [
                      sum(data_1['angel'] == 0),
                      sum(data_1['angel'] == 1),
                      sum(data_1['angel'] == 2)
                  ]])
    weight = np.array([[1.0, 1.0, 1.0], [1.0, 0.0, 2.0]])
    # パラメータ推定
    with pm.Model() as model:
        alpha = [1., 1., 1.]  # hyper-parameter of DirichletDist.
        pi = pm.Dirichlet('pi', a=np.array(alpha))
        for i in np.arange(d.shape[0]):
            piw = pi * weight[i]
            m = pm.Multinomial('m_%s' % (i),
                               n=np.sum(d[i]),
                               p=piw,
                               observed=d[i])
        trace = pm.sample(n_sample, chains=n_chain)
    np.savetxt('trace_pi.csv', trace['pi'], delimiter=',')
    # Silver
    hpd_l, hpd_u = pm.hpd(trace['pi'][:, 1])
    print('Silver : 95% HPD : {}-{}'.format(hpd_l, hpd_u))
    print('Silver ExpectedValue : {}'.format(trace['pi'][:, 1].mean()))
    # Gold
    hpd_l, hpd_u = pm.hpd(trace['pi'][:, 2])
    print('Gold : 95% HPD : {}-{}'.format(hpd_l, hpd_u))
    print('Gold ExpectedValue : {}'.format(trace['pi'][:, 2].mean()))
    # save fig
    if ax is not None:
        pm.plot_posterior(trace['pi'][:, 0], ax=ax[0])
        pm.plot_posterior(trace['pi'][:, 1], ax=ax[1])
        pm.plot_posterior(trace['pi'][:, 2], ax=ax[2])
        ax[0].set_title('Nothing')
        ax[1].set_title('SilverAngel')
        ax[2].set_title('GoldAngel')
    return trace
Esempio n. 26
0
def run(n=1500):
    if n == 'short':
        n = 50

    print('Model with no censored data (omniscient)')
    with omniscient_model:
        trace = pm.sample(n)
        pm.plot_posterior(trace[-1000:], varnames=['mu', 'sigma'])
        plt.show()

    print('Imputed censored model')
    with imputed_censored_model:
        trace = pm.sample(n)
        pm.plot_posterior(trace[-1000:], varnames=['mu', 'sigma'])
        plt.show()

    print('Unimputed censored model')
    with unimputed_censored_model:
        trace = pm.sample(n)
        pm.plot_posterior(trace[-1000:], varnames=['mu', 'sigma'])
        plt.show()
Esempio n. 27
0
def run(n=1500):
    if n == 'short':
        n = 50

    print('Model with no censored data (omniscient)')
    with omniscient_model:
        trace = pm.sample(n)
        pm.plot_posterior(trace[-1000:], varnames=['mu', 'sigma'])
        plt.show()

    print('Imputed censored model')
    with imputed_censored_model:
        trace = pm.sample(n)
        pm.plot_posterior(trace[-1000:], varnames=['mu', 'sigma'])
        plt.show()

    print('Unimputed censored model')
    with unimputed_censored_model:
        trace = pm.sample(n)
        pm.plot_posterior(trace[-1000:], varnames=['mu', 'sigma'])
        plt.show()
## Plot KDE and sampled values for each parameter.
#pm.traceplot(trace)

model_idx_sample = trace['model_index']
pM1 = sum(model_idx_sample == 0) / len(model_idx_sample)
pM2 = 1 - pM1

plt.figure(figsize=(15, 15))
plt.subplot2grid((3,3), (0,0), colspan=3)
plt.plot(model_idx_sample, label='p(DiffMu|D) = %.3f ; p(SameMu|D) = {:.3f}'.format(pM1, pM2));
plt.xlabel('Step in Markov Chain')
plt.legend(loc='upper right', framealpha=0.75)

count = 0
position = [(1,0), (1,1), (1,2), (2,0), (2,1), (2,2)]
for i in range(0, 4):
    mui_sample = trace['mu1'][:,i][model_idx_sample == 0]
    for j in range(i+1, 4):
        muj_sample = trace['mu1'][:,j][model_idx_sample == 0]
        ax = plt.subplot2grid((3,3), position[count])
        pm.plot_posterior(mui_sample-muj_sample,
                          ref_val=0, ax=ax)
        plt.title(r'$\mu_{} - \mu_{}$'.format(i+1, j+1))
        plt.xlim(-0.3, 0.3)
        count += 1


plt.tight_layout()
plt.savefig('Figure_12.5.png')
plt.show()
Esempio n. 29
0
## Print summary for each trace
#pm.summary(trace)

## Check for mixing and autocorrelation
#pm.autocorrplot(trace, vars =[mu, tau])

## Plot KDE and sampled values for each parameter.
#pm.traceplot(trace)

## Extract chains
muG_sample = trace['muG']
tauG_sample = trace['tauG']
m_sample = trace['m']
d_sample = trace['d']

# Plot the hyperdistributions:
_, ax = plt.subplots(1, 4, figsize=(20, 5))
pm.plot_posterior(muG_sample, bins=30, ax=ax[0])
ax[0].set_xlabel(r'$\mu_g$', fontsize=16)
pm.plot_posterior(tauG_sample, bins=30, ax=ax[1])
ax[1].set_xlabel(r'$\tau_g$', fontsize=16)
pm.plot_posterior(m_sample, bins=30, ax=ax[2])
ax[2].set_xlabel('m', fontsize=16)
pm.plot_posterior(d_sample, bins=30, ax=ax[3])
ax[3].set_xlabel('d', fontsize=16)

plt.tight_layout()
plt.savefig('Figure_15.9.png')
plt.show()
Esempio n. 30
0
with model1:
    theta = pm.Dirichlet("theta", a=alpha, shape=(D, K))
    phi = pm.Dirichlet("phi", a=beta, shape=(K, V))
    doc = pm.DensityDist('doc', log_lda(theta, phi), observed=data)
with model1:
    inference = pm.ADVI()
    approx = pm.fit(
        n=10000,
        method=inference,
        callbacks=[pm.callbacks.CheckParametersConvergence(diff='absolute')])

#inference
tr1 = approx.sample(draws=1000)
pm.plots.traceplot(tr1)
pm.plot_posterior(tr1, color='LightSeaGreen')

plt.plot(approx.hist)
'''
With MCMC
'''

with model:
    theta = pm.Dirichlet("thetas", a=alpha, shape=(D, K))
    phi = pm.Dirichlet("phis", a=beta, shape=(K, V))
    z = pm.Categorical("zx", p=theta, shape=(W, D))
    w = pm.Categorical("wx",
                       p=t.reshape(phi[z.T], (D * W, V)),
                       observed=data.reshape(D * W))
with model:
    tr = pm.sample(1000, chains=1)
Esempio n. 31
0
def bayesian_t(df, val_col, grp_col='regulated',
               sig_fac=2, unif_l=0, unif_u=20,
               exp_mn=30, 
               plot_trace=False, plot_ppc=False,
               plot_vars=False, plot_diffs=True,
               steps=2000, mcmc='metropolis'):
    """ Simple Bayesian test for differences between two groups.
    
    Args:
        df         Dataframe. Must have a column containing values
                   and a categorical 'regulated' column that is [0, 1]
                   to define the two groups
        val_col    Name of the values column
        grp_col    Name of the categorical column defining the groups
        sig_fac    Factor applied to std. dev. of pooled data to define
                   prior std. dev. for group means
        unif_l     Lower bound for uniform prior on std. dev. of group
                   means
        unif_u     Upper bound for uniform prior on std. dev. of group
                   means
        exp_mn     Mean of exponential prior for v in Student-T 
                   distribution
        plot_trace Whether to plot the MCMC traces
        plot_ppc   Whether to perform and plot the Posterior Predictive
                   Check 
        plot_vars  Whether to plot posteriors for variables
        plot_diffs Whether to plot posteriors for differences
        steps      Number of steps to take in MCMC chains
        mcmc       Sampler to use: ['metropolis', 'slice', 'nuts']
    
    Returns:
        Creates plots showing the distribution of differences in 
        means and variances, plus optional diagnostics. Returns the 
        MCMC trace
    """
    import numpy as np
    import pymc3 as pm
    import pandas as pd
    import seaborn as sn
    import matplotlib.pyplot as plt

    # Get overall means and s.d.
    mean_all = df[val_col].mean()
    std_all = df[val_col].std()

    # Group data
    grpd = df.groupby(grp_col)
    
    # Separate groups
    reg_data = grpd.get_group(1)[val_col].values
    ureg_data = grpd.get_group(0)[val_col].values   

    # Setup model
    with pm.Model() as model:
        # Priors for means of Student-T dists
        reg_mean = pm.Normal('regulated_mean', mu=mean_all, sd=std_all*sig_fac)
        ureg_mean = pm.Normal('unregulated_mean', mu=mean_all, sd=std_all*sig_fac)

        # Priors for std. dev. of Student-T dists
        reg_std = pm.Uniform('regulated_std', lower=unif_l, upper=unif_u)
        ureg_std = pm.Uniform('unregulated_std', lower=unif_l, upper=unif_u)

        # Prior for v of Student-T dists
        nu = pm.Exponential('v_minus_one', 1./29.) + 1

        # Define Student-T dists
        # PyMC3 uses precision = 1 / (sd^2) to define dists rather than std. dev.
        reg_lam = reg_std**-2
        ureg_lam = ureg_std**-2

        reg = pm.StudentT('regulated', nu=nu, mu=reg_mean, lam=reg_lam, observed=reg_data)
        ureg = pm.StudentT('unregulated', nu=nu, mu=ureg_mean, lam=ureg_lam, observed=ureg_data)

        # Quantities of interest (difference of means and std. devs.)
        diff_of_means = pm.Deterministic('difference_of_means', reg_mean - ureg_mean)
        diff_of_stds = pm.Deterministic('difference_of_stds', reg_std - ureg_std)
        
        # Run sampler to approximate posterior
        if mcmc == 'metropolis':
            trace = pm.sample(steps, step=pm.Metropolis())
        elif mcmc == 'slice':
            trace = pm.sample(steps, step=pm.Slice())
        elif mcmc == 'nuts':
            trace = pm.sample(steps)
        else:
            raise ValueError("mcmc must be one of ['metropolis', 'slice', 'nuts']")

    # Plot results
    # Raw data
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14,4))
    
    for name, grp in grpd:
        sn.distplot(grp[val_col].values, ax=axes[name], kde=False)
        axes[name].set_title('Regulated = %s' % name)        

    # Traces
    if plot_trace:
        pm.traceplot(trace)
    
    # Posteriors for variables
    if plot_vars:
        pm.plot_posterior(trace[1000:],
                          varnames=['regulated_mean', 'unregulated_mean', 
                                    'regulated_std', 'unregulated_std'],
                          alpha=0.3)

    # Posteriors for differences
    if plot_diffs:
        pm.plot_posterior(trace[1000:],
                          varnames=['difference_of_means', 'difference_of_stds'],
                          ref_val=0,
                          alpha=0.3)
        
    # Posterior predictive check
    if plot_ppc:
        ppc = pm.sample_ppc(trace, samples=500, model=model, size=100)

        fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14,4))

        sn.distplot([n.mean() for n in ppc['unregulated']], ax=axes[0])
        axes[0].axvline(ureg_data.mean(), c='k')
        axes[0].set(title='Posterior predictive of the mean (unregulated)', 
                    xlabel='Mean', 
                    ylabel='Frequency')

        sn.distplot([n.mean() for n in ppc['regulated']], ax=axes[1])
        axes[1].axvline(reg_data.mean(), c='k')
        axes[1].set(title='Posterior predictive of the mean (regulated)', 
                    xlabel='Mean', 
                    ylabel='Frequency')
    
    return trace
# Plot the trajectory of the last 500 sampled values.
plt.plot(theta1_sample[:-500], theta2_sample[:-500], marker='o',  color='skyblue')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r'$\theta1$')
plt.ylabel(r'$\theta2$')

# Display means in plot.
plt.plot(0, label='M = %.3f, %.3f' % (np.mean(theta1_sample), np.mean(theta2_sample)), alpha=0.0)

plt.legend(loc='upper left')
plt.savefig('Figure_8.6.png')

# Plot a histogram of the posterior differences of theta values.
theta_diff = theta1_sample - theta2_sample
pm.plot_posterior(theta_diff, ref_val=0.0, bins=30, color='skyblue')
plt.xlabel(r'$\theta_1 - \theta_2$')
plt.savefig('Figure_8.8.png')

# For Exercise 8.5:
# Posterior prediction. For each step in the chain, use the posterior thetas 
# to flip the coins.
chain_len = len(theta1_sample)
# Create matrix to hold results of simulated flips:
y_pred = np.zeros((2, chain_len))
for step_idx in range(chain_len):  # step through the chain
    # flip the first coin:
    p_head1 = theta1_sample[step_idx]
    y_pred[0, step_idx] = np.random.choice([0,1], p=[1-p_head1, p_head1])
    # flip the second coin:
    p_head2 = theta2_sample[step_idx]
plt.subplot(1, 2, 1)
thin_idx = 50
plt.plot(z1[::thin_idx], z0[::thin_idx], 'b.', alpha=0.7)
plt.ylabel('Standardized Intercept')
plt.xlabel('Standardized Slope')
plt.subplot(1, 2, 2)
plt.plot(b1[::thin_idx], b0[::thin_idx], 'b.', alpha=0.7)
plt.ylabel('Intercept (ht when wt=0)')
plt.xlabel('Slope (pounds per inch)')
plt.tight_layout()
plt.savefig('Figure_16.4.png')

# Display the posterior of the b1:
plt.figure(figsize=(8, 5))
ax = plt.subplot(1, 2, 1)
pm.plot_posterior(z1, ref_val=0.0, bins=30, ax=ax)
ax.set_xlabel('Standardized slope')
ax = plt.subplot(1, 2, 2)
pm.plot_posterior(b1, ref_val=0.0, bins=30, ax=ax)
ax.set_xlabel('Slope (pounds per inch)')
plt.tight_layout()
plt.savefig('Figure_16.5.png')

# Display data with believable regression lines and posterior predictions.
plt.figure()
# Plot data values:
x_rang = np.max(x) - np.min(x)
y_rang = np.max(y) - np.min(y)
lim_mult = 0.25
x_lim = [np.min(x)-lim_mult*x_rang, np.max(x)+lim_mult*x_rang]
y_lim = [np.min(y)-lim_mult*y_rang, np.max(y)+lim_mult*y_rang]
                            scale = np.repeat([sigma[chain_idx]], [len(x_post_pred)]))

for x_idx in range(len(x_post_pred)):
    y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx])


# Display believable beta0 and b1 values
plt.figure()
thin_idx = 5
plt.plot(b1[::thin_idx], b0[::thin_idx], '.')
plt.ylabel("Intercept")
plt.xlabel("Slope")
plt.savefig('Figure_16.x0.png')

# Display the posterior of the b1:
ax = pm.plot_posterior(b1, ref_val=0.0, bins=30)
ax.set_xlabel(r'Slope ($\Delta$ tar  / $\Delta$ weight)')
plt.title('Mean tdf = %.2f' % tdf_m)
plt.savefig('Figure_16.8b.png')

# Display data with believable regression lines and posterior predictions.
plt.figure()
plt.plot(x, y, 'k.')
plt.title('Data with credible regression lines')
plt.xlabel('weight')
plt.ylabel('tar')
plt.xlim(x_lim)
plt.ylim(y_lim)
# Superimpose a smattering of believable regression lines:
for i in range(0, len(b0), 5):
    plt.plot(x, b0[i] + b1[i]*x  , c='k', alpha=0.05 )
#pm.summary(trace)

## Check for mixing and autocorrelation
#pm.autocorrplot(trace, vars =[mu, tau])

## Plot KDE and sampled values for each parameter.
#pm.traceplot(trace)

mu_sample = trace['mu']
sigma_sample = trace['sd']



plt.figure(figsize=(10, 6))
ax = plt.subplot(1, 2, 1)
pm.plot_posterior(mu_sample, bins=30, ax=ax)
ax.set_xlabel('mu')
ax.set_title = 'Posterior'
ax.set_xlim(98, 102)

plt.subplot(1, 2, 2)

mu_mean = np.mean(mu_sample)
sigma_mean = np.mean(sigma_sample)

plt.scatter(mu_sample, sigma_sample , c='gray')
plt.plot(mu_mean, sigma_mean, 'C1*',
        label=r'$\mu$ = %.1f, $\sigma$ = %.1f' % (mu_mean, sigma_mean))
plt.xlabel('mu')
plt.ylabel('sigma')
plt.title('Posterior')
            n_accepted += 1
    else:
        # reject the proposed jump, stay at current position
        trajectory[t+1] = current_position
        # increment the rejected counter, just to monitor performance
        if t > burn_in:
            n_rejected += 1


# Extract the post-burn_in portion of the trajectory.
accepted_traj = trajectory[burn_in:]
# End of Metropolis algorithm.

# Display the posterior.
ROPE = np.array([0.76, 0.8])
pm.plot_posterior(accepted_traj,  ref_val=0.9, rope=ROPE)
plt.xlabel = 'theta'


# Display rejected/accepted ratio in the plot.
mean_traj = np.mean(accepted_traj)
std_traj = np.std(accepted_traj)
plt.plot(0, label=r'$N_{pro}=%s$ $\frac{N_{acc}}{N_{pro}} = %.3f$' % (len(accepted_traj), (n_accepted/len(accepted_traj))), alpha=0)

# Evidence for model, p(D).

# Compute a,b parameters for beta distribution that has the same mean
# and stdev as the sample from the posterior. This is a useful choice
# when the likelihood function is Bernoulli.
a =   mean_traj   * ((mean_traj*(1 - mean_traj)/std_traj**2) - 1)
b = (1 - mean_traj) * ((mean_traj*(1 - mean_traj)/std_traj**2) - 1)
if n_predictors >= 6: # don't display if too many predictors
    n_predictors == 6

columns = ['Sigma y', 'Intercept']
[columns.append('Slope_%s' % i) for i in predictor_names[:n_predictors]]
traces = np.array([sigma_samp, b0_samp, b_samp[:,0], b_samp[:,1]]).T
df = pd.DataFrame(traces, columns=columns)
g = sns.PairGrid(df)
g.map(plt.scatter)
plt.savefig('Figure_17.Xa.png')

## Display the posterior:

plt.figure(figsize=(16,4))
ax = plt.subplot(1, n_predictors+2, 1)
pm.plot_posterior(sigma_samp, ax=ax)
ax.set_xlabel(r'$\sigma y$')
ax = plt.subplot(1, n_predictors+2, 2)
pm.plot_posterior(b0_samp, ax=ax)
ax.set_xlabel('Intercept')

for i in range(0, n_predictors):
    ax = plt.subplot(1, n_predictors+2, 3+i)
    pm.plot_posterior(b_samp[:,i], ref_val=0, ax=ax)
    ax.set_xlabel('Slope_%s' % predictor_names[i])
plt.tight_layout()
plt.savefig('Figure_17.Xb.png')

# Posterior prediction:
# Define matrix for recording posterior predicted y values for each xPostPred.
# One row per xPostPred value, with each row holding random predicted y values.
## Plot KDE and sampled values for each parameter.
#pm.traceplot(trace)
pm.traceplot(trace)


# Create arrays with the posterior sample
mu1_sample = trace['mu'][:,0]
mu2_sample = trace['mu'][:,1]
mu3_sample = trace['mu'][:,2]
mu4_sample = trace['mu'][:,3]


# Plot differences among filtrations experiments
fig, ax = plt.subplots(1, 3, figsize=(15, 6))
pm.plot_posterior(mu1_sample-mu2_sample, ax=ax[0], color='skyblue')
ax[0].set_xlabel(r'$\mu1-\mu2$')

# Plot differences among condensation experiments
pm.plot_posterior(mu3_sample-mu4_sample, ax=ax[1], color='skyblue')
ax[1].set_xlabel(r'$\mu3-\mu4$')

# Plot differences between filtration and condensation experiments
a = (mu1_sample+mu2_sample)/2 - (mu3_sample+mu4_sample)/2
pm.plot_posterior(a, ax=ax[2], color='skyblue')
ax[2].set_xlabel(r'$(\mu1+\mu2)/2 - (\mu3+\mu4)/2$')

plt.tight_layout()
plt.savefig('Figure_9.16.png')
plt.show()
Esempio n. 39
0
 def posterior_plot(self, **kwargs):
     return pm.plot_posterior(self.posterior_, **kwargs)
Esempio n. 40
0
    # define priors
    prior_v1 = pm.Beta('prior_v1', alpha=2, beta=2)
    prior_v2 = pm.Beta('prior_v2', alpha=2, beta=2)

    # define likelihood
    like_v1 = pm.Binomial('like_v1', n=n, p=prior_v1, observed=obs_v1)
    like_v2 = pm.Binomial('like_v2', n=n, p=prior_v2, observed=obs_v2)
    
    # define metrics
    pm.Deterministic('difference', prior_v2 - prior_v1)
    pm.Deterministic('relation', (prior_v2/prior_v1) - 1)

    # inference
    trace = pm.sample(draws=50000, step=pm.Metropolis(), start=pm.find_MAP(), progressbar=True)

_ = pm.plot_posterior(trace[1000:], varnames=['difference', 'relation'], 
                      ref_val=0, color='#87ceeb')


# NOTE 
#         n        - Number of training examples.
#         i        - ith training example in a data set.
#         y(i)     - Ground truth label for ith training example.
#         y_hat(i) - Prediction for ith training example.
## Compute MSE
    import numpy as np
    y_hat = np.array([0.000, 0.166, 0.333])
    y_true = np.array([0.000, 0.254, 0.998])
    def rmse(predictions, targets):
        differences = predictions - targets
        differences_squared = differences ** 2
        mean_of_differences_squared = differences_squared.mean()
    y = pm.Bernoulli('y', p=theta, observed=y)

    # Generate a MCMC chain
    trace = pm.sample(1000)


# create an array with the posterior sample
theta_sample = trace['theta']

fig, ax = plt.subplots(1, 2)
ax[0].plot(theta_sample[:500], np.arange(500), marker='o', color='skyblue')
ax[0].set_xlim(0, 1)
ax[0].set_xlabel(r'$\theta$')
ax[0].set_ylabel('Position in Chain')

pm.plot_posterior(theta_sample, ax=ax[1], color='skyblue');
ax[1].set_xlabel(r'$\theta$');

# Posterior prediction:
# For each step in the chain, use posterior theta to flip a coin:
y_pred = np.zeros(len(theta_sample))
for i, p_head in enumerate(theta_sample):
    y_pred[i] = np.random.choice([0, 1], p=[1 - p_head, p_head])

# Jitter the 0,1 y values for plotting purposes:
y_pred_jittered = y_pred + np.random.uniform(-.05, .05, size=len(theta_sample))

# Now plot the jittered values:
plt.figure()
plt.plot(theta_sample[:500], y_pred_jittered[:500], 'C1o')
plt.xlim(-.1, 1.1)
		# likehood
		y = pm.Normal('y', mu=means[idx], sd=sds[idx], observed=y)

		trace = pm.sample(5000, njobs=1)
		chain = trace[100::]
		fig = plt.figure()
		pm.traceplot(chain)
		pdf_all.savefig()
		
		# mean, standard deviation, and the HPD intervals
		print(pm.summary(trace))

		# 
		dist = stats.norm()
		fig, ax = plt.subplots(3, 2, figsize=(16,12))

		comparisons = [(i,j) for i in range(len(set(idx))) for j in range(i+1, len(set(idx)))]
		pos = [(k,l) for k in range(3) for l in (0,1)]

		for (i,j), (k,l) in zip(comparisons, pos):
			means_diff = chain['means'][:,i] - chain['means'][:,j]
			d_cohen = (means_diff / np.sqrt((chain['sds'][:,i]**2 + chain['sds'][:,j]**2)/2) ).mean()

			ps = dist.cdf(d_cohen/(2**0.5))

			pm.plot_posterior(means_diff, ref_val=0, ax=ax[k,l], color='skyblue')
			ax[k,l].plot(0, label="Cohen's d={:.2f}\nPrbo sup={:.2f}".format(d_cohen, ps), alpha=0)
			ax[k,l].set_xlabel('$\mu_{}-\mu_{}$'.format(i, j), fontsize=15)
			ax[k,l ].legend(loc=0, fontsize=14)
		pdf_all.savefig(fig)
# Extract values of 'a'
a0_sample = trace['a0']
b1_sample = trace['b1']
b2_sample = trace['b2']
b1b2_sample = trace['b1b2']

b0_sample = a0_sample * np.std(y) + np.mean(y)
b1_sample = b1_sample * np.std(y)
b2_sample = b2_sample * np.std(y)
b1b2_sample = b1b2_sample * np.std(y)


plt.figure(figsize=(25,20))
ax = plt.subplot(451)
pm.plot_posterior(b0_sample,  bins=50, ax=ax)
ax.set_xlabel(r'$\beta0$')
ax.set_title('Baseline')
plt.xlim(b0_sample.min(), b0_sample.max());

count = 2
for i in range(len(b1_sample[0])):
    ax = plt.subplot(4, 5, count)
    pm.plot_posterior(b1_sample[:,i], ax=ax)
    ax.set_xlabel(r'$\beta1_{}$'.format(i))
    ax.set_title('x1: {}'.format(x1names[i]))
    count += 1

for i in range(len(b2_sample[0])):
    ax = plt.subplot(4, 5, count)
    pm.plot_posterior(b2_sample[:,i], bins=50, ax=ax)
#pm.autocorrplot(trace, varnames=['mu', 'kappa'])

## Plot KDE and sampled values for each parameter.
#pm.traceplot(trace[burnin:])
pm.traceplot(trace)

# Create arrays with the posterior sample
mu1_sample = trace['mu'][:,0][burnin:]
mu2_sample = trace['mu'][:,1][burnin:]
mu3_sample = trace['mu'][:,2][burnin:]
mu4_sample = trace['mu'][:,3][burnin:]


# Plot differences among filtrations experiments
fig, ax = plt.subplots(1, 3, figsize=(15, 6))
pm.plot_posterior((mu1_sample-mu2_sample), ax=ax[0], ref_val=0, color='skyblue')
ax[0].set_xlabel(r'$\mu1-\mu2$')

# Plot differences among condensation experiments
pm.plot_posterior((mu3_sample-mu4_sample), ax=ax[1], ref_val=0, color='skyblue')
ax[1].set_xlabel(r'$\mu3-\mu4$')

# Plot differences between filtration and condensation experiments
a = (mu1_sample+mu2_sample)/2 - (mu3_sample+mu4_sample)/2
pm.plot_posterior(a, ax=ax[2], ref_val=0, color='skyblue')
ax[2].set_xlabel(r'$(\mu1+\mu2)/2 - (\mu3+\mu4)/2$')

plt.tight_layout()
plt.savefig('Figure_9.18_upper.png')
plt.show()
#pm.autocorrplot(trace, vars =[nu, eta])

## Plot KDE and sampled values for each parameter.
#pm.traceplot(trace)


model_idx_sample = trace['model_index']
pM1 = sum(model_idx_sample == 0) / len(model_idx_sample)
pM2 = 1 - pM1


nu_sample_M1 = trace['nu'][model_idx_sample == 0]
eta_sample_M2 = trace['eta'][model_idx_sample == 1]

plt.figure()
plt.subplot(2, 1, 1)
pm.plot_posterior(nu_sample_M1)
plt.xlabel(r'$\nu$')
plt.ylabel('frequency')
plt.title(r'p($\nu$|D,M2), with p(M2|D)={:.3}f'.format(pM1), fontsize=14)
plt.xlim(-8, 8)

plt.subplot(2, 1, 2)
pm.plot_posterior(eta_sample_M2)
plt.xlabel(r'$\eta$')
plt.ylabel('frequency')
plt.title(r'p($\eta$|D,M2), with p(M2|D)={:.3f}'.format(pM2), fontsize=14)
plt.xlim(0, 8)
plt.savefig('figure_ex_10.2_a.png')
plt.show()
Esempio n. 46
0
def robust_lin_reg(df, var_map, 
                   steps=2000, mcmc='metropolis',
                   plot_trace=True, plot_vars=True):
    """ Robust Bayesian linear regression.
    
    Args:
        df         Dataframe. Must have a column containing values
                   and a categorical 'regulated' column that is [0, 1]
                   to define the two groups
        val_map    Dict specifying x and y vars: {'x':'expl_var',
                                                  'y':'resp_var'}
        steps      Number of steps to take in MCMC chains
        mcmc       Sampler to use: ['metropolis', 'slice', 'nuts']
        plot_trace Whether to plot the MCMC traces
        plot_vars  Whether to plot posteriors for variables
    
    Returns:
        Creates plots showing the distribution of differences in 
        means and variances, plus optional diagnostics. Returns the 
        MCMC trace
    """
    import pymc3 as pm
    import pandas as pd
    import matplotlib.pyplot as plt
    import numpy as np
    import theano 
    
    # Get cols
    df = df[var_map.values()]
    
    # Swap keys and values
    var_map_rev = dict((v,k) for k,v in var_map.iteritems())
    
    # Convert df columns to x and y
    df.columns = [var_map_rev[i] for i in df.columns]

    with pm.Model() as model:
        # Priors
        nu = pm.Exponential('v_minus_one', 1./29.) + 1
        
        # The patsy string below automatically assumes mu=0 and estimates
        # lam = (1/s.d.**2), so don't need to add these. Do need to add 
        # prior for nu though.
        family = pm.glm.families.StudentT(nu=nu)
        
        # Define model
        pm.glm.glm('y ~ x', df, family=family)
        
        # Find MAP as starting point
        start = pm.find_MAP()

        # Run sampler to approximate posterior
        if mcmc == 'metropolis':
            step = pm.Metropolis()
            trace = pm.sample(steps, step, start=start)
        elif mcmc == 'slice':
            step = pm.Slice()
            trace = pm.sample(steps, step, start=start)
        elif mcmc == 'nuts':
            step = pm.NUTS(scaling=start)
            trace = pm.sample(steps, step)
        else:
            raise ValueError("mcmc must be one of ['metropolis', 'slice', 'nuts']")

    # Traces
    if plot_trace:
        pm.traceplot(trace)
    
    # Posteriors for variables
    if plot_vars:
        pm.plot_posterior(trace[-1000:],
                          varnames=['v_minus_one', 'lam'],
                          alpha=0.3)

        pm.plot_posterior(trace[1000:],
                          varnames=['x', 'Intercept'],
                          ref_val=0,
                          alpha=0.3)
        
    # PPC
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, xlabel=var_map['x'], ylabel=var_map['y'])
    ax.scatter(df.x, df.y, marker='o', label='Data')
    pm.glm.plot_posterior_predictive(trace, samples=50, eval=df.x,
                                     label='PPC', alpha=0.3)
    
    return trace
pm.autocorrplot(trace[burnin:], varnames=['mu', 'kappa'])
#pm.autocorrplot(trace, vars =[mu, kappa])

## Plot KDE and sampled values for each parameter.
pm.traceplot(trace[burnin:])
#pm.traceplot(trace)

# Create arrays with the posterior sample
theta1_sample = trace['theta'][:,0][burnin:]
theta28_sample = trace['theta'][:,27][burnin:]
mu_sample = trace['mu'][burnin:]
kappa_sample = trace['kappa'][burnin:]

# Plot mu histogram
fig, ax = plt.subplots(2, 2, figsize=(12,12))
pm.plot_posterior(mu_sample, ax=ax[0, 0], color='skyblue')
ax[0, 0].set_xlabel(r'$\mu$')

# Plot kappa histogram
pm.plot_posterior(kappa_sample, ax=ax[0, 1], color='skyblue')
ax[0, 1].set_xlabel(r'$\kappa$')

# Plot theta 1
pm.plot_posterior(theta1_sample, ax=ax[1, 0], color='skyblue')
ax[1, 0].set_xlabel(r'$\theta1$')

# Plot theta 28
pm.plot_posterior(theta1_sample, ax=ax[1, 1], color='skyblue')
ax[1, 1].set_xlabel(r'$\theta28$')

Esempio n. 48
0
theta2_sample = trace['theta'][:, 1]
theta3_sample = trace['theta'][:, 2]
mu_sample = trace['mu']
kappa_sample = trace['kappa']

# Scatter plot hyper-parameters
fig, ax = plt.subplots(4, 3, figsize=(12, 12))
ax[0, 0].scatter(mu_sample, kappa_sample, marker='o', color='skyblue')
ax[0, 0].set_xlim(0, 1)
ax[0, 0].set_xlabel(r'$\mu$')
ax[0, 0].set_ylabel(r'$\kappa$')

# Plot mu histogram
#plot_post(mu_sample, xlab=r'$\mu$', show_mode=False, labelsize=9, framealpha=0.5)

pm.plot_posterior(mu_sample, ax=ax[0, 1], color='skyblue')
ax[0, 1].set_xlabel(r'$\mu$')
ax[0, 1].set_xlim(0, 1)

# Plot kappa histogram
#plot_post(kappa_sample, xlab=r'$\kappa$', show_mode=False, labelsize=9, framealpha=0.5)
pm.plot_posterior(kappa_sample, ax=ax[0, 2], color='skyblue')
ax[0, 2].set_xlabel(r'$\kappa$')

# Plot theta 1

#plot_post(theta1_sample, xlab=r'$\theta1$', show_mode=False, labelsize=9, framealpha=0.5)
pm.plot_posterior(theta1_sample, ax=ax[1, 0], color='skyblue')
ax[1, 0].set_xlabel(r'$\theta1$')
ax[1, 0].set_xlim(0, 1)
#pm.autocorrplot(trace, vars=model.unobserved_RVs[:-1])

## Plot KDE and sampled values for each parameter.
pm.traceplot(trace)


a0_sample = trace['a0']
b_sample = trace['b']
b0_sample = a0_sample * np.std(y) + np.mean(y)
b_sample = b_sample * np.std(y)


plt.figure(figsize=(20, 4))
for i in range(5):
    ax = plt.subplot(1, 5, i+1)
    pm.plot_posterior(b_sample[:,i], bins=50, ax=ax)
    ax.set_xlabel=r'$\beta1_{}$'.format(i)
    ax.set_title='x:{}'.format(i)
plt.tight_layout()
plt.savefig('Figure_18.xa.png')


nContrasts = len(contrast_dict)
if nContrasts > 0:
    plt.figure(figsize=(20, 8))
    count = 1
    for key, value in contrast_dict.items():
        contrast = np.dot(b_sample, value)
        ax = plt.subplot(2, 4, count)
        pm.plot_posterior(contrast,  ref_val=0.0, bins=50, ax=ax)
        ax.set_title('Contrast {}'.format(key))
Esempio n. 50
0
                                     group1_mean - group2_mean)
    diff_of_stds = pm.Deterministic('difference of stds',
                                    group1_std - group2_std)
    effect_size = pm.Deterministic(
        'effect size', diff_of_means / np.sqrt(
            (group1_std**2 + group2_std**2) / 2))

    # RUN
    #trace = pm.sample(2000, cores=2)  #  Nota Bene: https://github.com/pymc-devs/pymc3/issues/3388
    trace = pm.sample(1000, tune=1000, cores=1)

pm.kdeplot(np.random.exponential(30, size=10000), shade=0.5)

pm.plot_posterior(trace,
                  varnames=[
                      'group1_mean', 'group2_mean', 'group1_std', 'group2_std',
                      'ν_minus_one'
                  ],
                  color='#87ceeb')

pm.plot_posterior(
    trace,
    varnames=['difference of means', 'difference of stds', 'effect size'],
    ref_val=0,
    color='#87ceeb')

pm.forestplot(trace, varnames=['group1_mean', 'group2_mean'])

pm.forestplot(trace, varnames=['group1_std', 'group2_std', 'ν_minus_one'])

pm.summary(
    trace,
## Print summary for each trace
#pm.summary(trace)

## Check for mixing and autocorrelation
#pm.autocorrplot(trace, vars =[mu, tau])

## Plot KDE and sampled values for each parameter.
#pm.traceplot(trace)


## Extract chains
muG_sample = trace['muG']
tauG_sample = trace['tauG']
m_sample = trace['m']
d_sample = trace['d']

# Plot the hyperdistributions:
_, ax = plt.subplots(1, 4, figsize=(20, 5))
pm.plot_posterior(muG_sample, bins=30, ax=ax[0])
ax[0].set_xlabel(r'$\mu_g$', fontsize=16)
pm.plot_posterior(tauG_sample, bins=30 ,ax=ax[1])
ax[1].set_xlabel(r'$\tau_g$', fontsize=16)
pm.plot_posterior(m_sample, bins=30, ax=ax[2])
ax[2].set_xlabel('m', fontsize=16)
pm.plot_posterior(d_sample, bins=30, ax=ax[3])
ax[3].set_xlabel('d', fontsize=16)

plt.tight_layout()
plt.savefig('Figure_15.9.png')
plt.show()