Ejemplo n.º 1
0
def compute_hdp(samples, lims):

    ndim = np.ndim(samples)
    if ndim == 1:
        result = np.zeros(5)
    else:
        size = samples.shape
        result = np.zeros((5, size[1]))

    if ndim == 1:
        for i in range(len(lims)):
            kk = az.hpd(samples, credible_interval=lims[i])
            if i == 0:
                result[2] = np.mean(kk)
            elif i == 1:
                result[1] = kk[0]
                result[3] = kk[1]
            elif i == 2:
                result[0] = kk[0]
                result[4] = kk[1]
    else:
        for j in range(size[1]):
            for i in range(len(lims)):
                kk = az.hpd(samples[:, j], credible_interval=lims[i])
                if i == 0:
                    result[2, j] = np.mean(kk)
                elif i == 1:
                    result[1, j] = kk[0]
                    result[3, j] = kk[1]
                elif i == 2:
                    result[0, j] = kk[0]
                    result[4, j] = kk[1]

    return result
Ejemplo n.º 2
0
    def hdi(self, var_name: str, credible_mass: float = 0.95):
        """Calculate the highest posterior density interval (HDI)

        This function calculates a *credible interval* which contains the
        ``credible_mass`` most likely values of the parameter, given the data.
        Also known as an HPD interval.

        Parameters
        ----------
        var_name : str
            Name of variable.
        credible_mass : float
            The HDI will cover credible_mass * 100% of the probability mass.
            Default: 0.95, i.e. a 95% HDI.

        Returns
        -------
        (float, float)
            The endpoints of the HPD
        """
        check_credible_mass(credible_mass)

        az_major, az_minor, *_ = arviz.__version__.split('.')
        if (int(az_major), int(az_minor)) >= (0, 8):
            return tuple(arviz.hdi(self.trace[var_name], hdi_prob=credible_mass))
        else:
            return tuple(arviz.hpd(self.trace[var_name], credible_interval=credible_mass))
Ejemplo n.º 3
0
    def plot_samples(self,
                     samples,
                     titles,
                     num_samples,
                     color='grey',
                     scatters=None,
                     scatter_args={},
                     legend=True,
                     margined=False,
                     sample_gap=2):
        num_components = samples[0].shape[0]
        subplot_shape = ((num_components + 2) // 3, 3)
        if self.opt.for_report:
            subplot_shape = ((num_components + 1) // 2, 2)
        if num_components <= 1:
            subplot_shape = (1, 1)
        for j in range(num_components):
            ax = plt.subplot(subplot_shape[0], subplot_shape[1], 1 + j)
            plt.title(titles[j])
            if scatters is not None:
                plt.scatter(self.τ[self.common_ind],
                            scatters[j],
                            marker='x',
                            label='Observed',
                            **scatter_args)
            # plt.errorbar([n*10+n for n in range(7)], Y[j], 2*np.sqrt(Y_var[j]), fmt='none', capsize=5)

            for s in range(1, sample_gap * num_samples, sample_gap):
                kwargs = {}
                if s == 1:
                    kwargs = {'label': 'Samples'}

                plt.plot(self.τ,
                         samples[-s, j, :],
                         color=color,
                         alpha=0.5,
                         **kwargs)
            if j % subplot_shape[1] == 0:
                plt.ylabel(self.opt.ylabel)
            # HPD:
            bounds = arviz.hpd(samples[-self.opt.num_hpd:, j, :],
                               credible_interval=0.95)
            plt.fill_between(self.τ,
                             bounds[:, 0],
                             bounds[:, 1],
                             color='grey',
                             alpha=0.3,
                             label='95% credibility interval')

            plt.xticks(self.t)
            ax.set_xticklabels(self.t)
            if margined:
                plt.ylim(min(samples[-1, j]) - 2, max(samples[-1, j]) + 2)
            # if self.opt.for_report:
            # plt.ylim(-0.2, max(samples[-1, j]) + 0.2)
            plt.xlabel('Time (h)')
            if legend:
                plt.legend()
        plt.tight_layout()
Ejemplo n.º 4
0
def plot_poserterior_mean(trace_mu,x_val, y_val,credible_interval = .97):
    idx = np.argsort(x_val)
    mu_hpd = az.hpd(trace_mu, credible_interval=credible_interval)

    plt.plot(x_val, y_val, marker = 'o', linestyle = '')
    plt.plot(x_val[idx], trace_mu.mean(axis = 0)[idx], linestyle = '-')
    plt.fill_between(x_val[idx],mu_hpd[idx,0],mu_hpd[idx,1],color = 'grey', alpha =.3)
    plt.xlabel('rugged')
    plt.ylabel('log gdp')
    return plt
Ejemplo n.º 5
0
def compute_hdi(arr, credible_interval=0.64):
    """
    Given array of (simulations, dimensions) computes Highest Density Intervals
    Sample dimension should be first dimension

    :param arr: Array of shape (n_samples, n_genes)
    :param credible_interval:
    :return:
    """
    return az.hpd(arr, credible_interval=credible_interval)
Ejemplo n.º 6
0
 def plot_bar_hpd(self,
                  var_samples,
                  var,
                  labels,
                  true_var=None,
                  width=0.1,
                  titles=None,
                  rotation=0,
                  true_hpds=None):
     hpds = list()
     num = var.shape[0]
     plotnum = var.shape[1] * 100 + 21
     for i in range(num):
         hpds.append(
             arviz.hpd(var_samples[-self.opt.num_hpd:, i, :],
                       credible_interval=0.95))
     hpds = np.array(hpds)
     hpds = abs(hpds - np.expand_dims(var, 2))
     for k in range(var_samples.shape[2]):
         plt.subplot(plotnum)
         plotnum += 1
         plt.bar(np.arange(num) - width,
                 var[:, k],
                 width=2 * width,
                 tick_label=labels,
                 color='chocolate',
                 label=self.opt.model_label)
         plt.errorbar(np.arange(num) - width,
                      var[:, k],
                      hpds[:, k].swapaxes(0, 1),
                      fmt='none',
                      capsize=5,
                      color='black')
         plt.xlim(-1, num)
         plt.xticks(rotation=rotation)
         if titles is not None:
             plt.title(titles[k])
         if true_var is not None:
             plt.bar(np.arange(num) + width,
                     true_var[:, k],
                     width=2 * width,
                     color='slategrey',
                     align='center',
                     label=self.opt.true_label)
             if true_hpds is not None:
                 plt.errorbar(np.arange(num) + width,
                              true_var[:, k],
                              true_hpds[:, k].swapaxes(0, 1),
                              fmt='none',
                              capsize=5,
                              color='black')
             plt.legend()
     plt.tight_layout()
     return hpds
Ejemplo n.º 7
0
def hpd(y, credible_interval=0.94):
    y = np.asarray(y)
    y_shape = y.shape
    if 0 in y_shape:
        return (np.array([]), np.array([]))
    hpd_ = az.hpd(y,
                  credible_interval=credible_interval,
                  circular=False,
                  multimodal=False)
    if hpd_.ndim == 1:
        hpd_ = np.expand_dims(hpd_, axis=0)
    return (hpd_[:, 0], hpd_[:, 1])
def make_plot(trace):
    plot_training_data()
    # plot logistic curve
    theta = trace['θ'].mean(axis=0)
    idx = np.argsort(x_c)
    plt.plot(x_c[idx], theta[idx], color='C2', lw=3)
    az.plot_hpd(x_c, trace['θ'], color='C2')

    # plot decision boundary
    plt.vlines(trace['bd'].mean(), 0, 1, color='k')
    bd_hpd = az.hpd(trace['bd'])
    plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='k', alpha=0.5)
Ejemplo n.º 9
0
def hpd_area(x, y, credible_interval=0.94, smooth=True):
    """
            Returns the x-,y-coordinates of the highest posterior density area 
            of the predicted values.

            Parameters:
            -----------
                x                   The x-coordinate of the data.
                y                   The predicted values of y-coordinate of the data.
                credible_interval   The credible_interval for the hpd
            Returns:
            --------
                A Tuple (x,y) of the x-,y-coordinates of the hpd area.
        """
    x = np.asarray(x)
    y = np.asarray(y)

    x_shape = x.shape
    y_shape = y.shape

    if 0 in x_shape or 0 in y_shape:
        return (np.array([]), np.array([]))

    if y_shape[-len(x_shape):] != x_shape:
        msg = "Dimension mismatch for x: {} and y: {}."
        msg += " y-dimensions should be (chain, draw, *x.shape) or"
        msg += " (draw, *x.shape)"
        raise TypeError(msg.format(x_shape, y_shape))

    if len(y_shape[:-len(x_shape)]) > 1:
        new_shape = tuple([-1] + list(x_shape))
        y = y.reshape(new_shape)

    hpd_ = az.hpd(y,
                  credible_interval=credible_interval,
                  circular=False,
                  multimodal=False)

    if smooth:
        x_data = np.linspace(x.min(), x.max(), 200)
        x_data[0] = (x_data[0] + x_data[1]) / 2
        hpd_interp = griddata(x, hpd_, x_data)
        y_data = savgol_filter(hpd_interp,
                               axis=0,
                               window_length=55,
                               polyorder=2)
        return (np.concatenate((x_data, x_data[::-1])),
                np.concatenate((y_data[:, 0], y_data[:, 1][::-1])))
    else:
        return (np.concatenate(
            (x, x[::-1])), np.concatenate((hpd_[:, 0], hpd_[:, 1][::-1])))
def hpd_shade(ax, interval, samples):
    """
    Caculate high prob density intervals using arviz and fill in between the
    interval.

    ACCEPTS
    ax [matplotlib axes]
    interval [float] fraction of the hpd localization
    samples [2d array] posterior samples. Columns are time points, rows samples
    """
    my_hpd = az.hpd(samples, credible_interval=interval)
    ax.fill_between(datadict['t_sim'],
                    my_hpd[:, 0],
                    my_hpd[:, 1],
                    alpha=0.2,
                    color='C1')
Ejemplo n.º 11
0
def hdi(x: np.ndarray, credible_interval: float = 0.94) -> np.ndarray:
    """Calculate highest density interval (HDI).

    This function acts as an alias to `arviz.hpd` function.

    Parameters
    ----------
    x
        Array containing MCMC samples.
    credible_interval
        Credible interval to compute. Defaults to 0.94.

    Returns
    -------
    np.ndarray
        Array containing the lower and upper value of the computed interval.
    """
    return az.hpd(x, credible_interval=credible_interval)
Ejemplo n.º 12
0
plt.show()

#%% [markdown]
# ### Code 4.19-.20
#%%
sns.kdeplot(sample_mu)
plt.xlabel('sample_mu')
plt.ylabel('density')
plt.show()
sns.kdeplot(sample_sig)
plt.xlabel('sample_sigma')
plt.ylabel('density')
plt.show()

#%%
print(az.hpd(sample_mu, credible_interval=0.5))
az.hpd(sample_sig, credible_interval=0.5)

#%% [markdown]
# ### Code 4.21
#%%
d3 = np.random.choice(d2.height, size=20, replace=False)

#%% [markdown]
# ### Code 4.22-.23
# We are doing this because we want to show that the posterior is not always
# gaussian in shape. This is not driven by the mean it's more driven by the variance which tends to have this right tail
#%%
mu_list = np.linspace(150, 170, num=200)
sigma_list = np.linspace(4, 20, num=200)
post = np.array(np.meshgrid(mu_list, sigma_list)).reshape(2, -1).T
Ejemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--sys-dir",
        help="specifies the parent folder for all config sys",
        type=str,
        required=False,
        default="/application/Distance-Based_Data/SupplementaryWebsite/",
    )
    parser.add_argument(
        "--results-dir",
        help="specifies the parent folder for all run results",
        type=str,
        required=False,
        default="/results/",
    )
    parser.add_argument(
        "--conf-steps",
        help="how many steps to check for confidence calibration",
        type=int,
        default=20,
    )
    parser.add_argument(
        "--predictive-samples",
        help="how many samples to draw from the predictive distribution",
        type=int,
        default=4000,
    )
    args = parser.parse_args()

    sys_dir = args.sys_dir
    results_dir = args.results_dir
    conf_steps = args.conf_steps
    n_post_samples = args.predictive_samples
    inference_data = os.path.join(results_dir, "last-inference")
    output = os.path.join(results_dir, "last-evaluation")
    contents = get_result_files(inference_data)

    repos = {}
    idx_keys = None
    obs = []
    for args, hist, tracer in contents:
        t = int(args["t"])
        sys_name = args["sys_name"]
        run_id = get_run_id_from_path(args["folder"])
        attribute = args["attribute"]
        if sys_name not in repos:
            repo = DistBasedRepo(sys_dir, sys_name, attribute)
            repos[sys_name] = repo
        repo = repos[sys_name]
        x_eval = list(repo.all_configs.keys())
        y_eval = list(repo.all_configs.values())
        err_dict = {}
        for it in hist:
            idx = tuple(args.values())
            model = hist[it]["prob-model"]
            trace = hist[it]["trace"]
            fitting_time = tracer.fitting_times[it]
            weighted_errs_per_sample = list(tracer.weighted_errs_per_sample)
            weighted_rel_errs_per_sample = list(
                tracer.weighted_rel_errs_per_sample)
            err_dict[it] = {
                "weighted_errs_per_sample": weighted_errs_per_sample,
                "weighted_rel_errs_per_sample": weighted_rel_errs_per_sample
            }
            fitting_time = tracer.fitting_times[it]
            ft_selection_seconds = tracer.prior_spectrum_cost

            t_start = time.time()

            alt_preds = tracer.predict_raw(x_eval)

            with model:
                ppc = pm.sample_posterior_predictive(
                    trace,
                    samples=n_post_samples,
                )
            t_end = time.time()
            pred_time = t_end - t_start

            y_samples = ppc["y_observed"]
            # y_pred_median = np.median(y_samples, axis=0)
            y_pred = np.mean(az.hpd(y_samples, credible_interval=0.01), axis=1)
            correct_in_dict = {}
            conf_mape_dict = {}
            conf_mape_without_zeros = {}
            intervals = np.linspace(0, 1, conf_steps + 1)[1:-1]
            print("computing confidence accuracies")
            for perc in intervals:
                correct_in_dict[perc] = float(
                    Tracer.calc_confidence_err(perc, y_eval, y_samples))
                conf_mape_dict[perc], conf_mape_without_zeros[
                    perc] = Tracer.calc_confidence_closest_mape(
                        perc, y_eval, y_samples)
                print("Finished", perc)

            # correct_in_095 = float(Tracer.calc_confidence_err(0.95, y_eval, y_samples))
            assert len(y_eval) == len(y_pred)
            eval_mape = score_mape(None, None, y_eval, y_pred)

            results = {
                "args": args,
                "it": it,
                "eval_mape": eval_mape,
                "fitting_time": fitting_time,
                "ft_selection_seconds": ft_selection_seconds,
                "n_post_samples": n_post_samples,
                "pred_time": pred_time,
                "corect_in_conf": correct_in_dict,
                "mape_to_nearest_conf_bound": conf_mape_dict
            }

            obs.append(results)
        print("storing pickle")
        if not os.path.exists(output):
            os.mkdir(output)
        out_path = os.path.join(output, run_id + ".p")
        with open(out_path, 'wb') as f:
            pickle.dump(obs, f)
        obs_str = pprint.PrettyPrinter().pformat(obs)
        str_out_path = os.path.join(output, run_id + ".txt")
        with open(str_out_path, 'w') as f:
            f.write(obs_str)
        out_path_err_dist = os.path.join(output, "errs-" + run_id + ".p")
        with open(out_path_err_dist, 'wb') as f:
            pickle.dump(err_dict, f)

    print("Index:", idx_keys)
    print("DONE")
                             observed=data["Marriage_std"].values)
    prior_samples = pm.sample_prior_predictive()
    m_5_4_trace = pm.sample()

# %%
mu_m_5_4_mean = m_5_4_trace["mu"].mean(axis=0)
residuals = data["Marriage_std"] - mu_m_5_4_mean

# %%
with m_5_4:
    m_5_4_ppc = pm.sample_posterior_predictive(m_5_4_trace,
                                               var_names=["mu", "divorce_std"],
                                               samples=1000)

mu_mean = m_5_4_ppc["mu"].mean(axis=0)
mu_hpd = az.hpd(m_5_4_ppc["mu"], credible_interval=0.89)

D_sim = m_5_4_ppc["divorce_std"].mean(axis=0)
D_PI = az.hpd(m_5_4_ppc["divorce_std"], credible_interval=0.89)

# %%
fig, ax = plt.subplots(figsize=(6, 6))
plt.errorbar(
    data["Divorce_std"].values,
    m_5_4_ppc["divorce_std"].mean(0),
    yerr=np.abs(m_5_4_ppc["divorce_std"].mean(0) - mu_hpd.T),
    fmt="C0o",
)
ax.scatter(data["Divorce_std"].values, D_sim)

min_x, max_x = data["Divorce_std"].min(), data["Divorce_std"].max()
Ejemplo n.º 15
0
#     ax=ax,
#     fill_kwargs={"alpha": 0.8, "color": "#a1dab4", "label": "Outcome 94% HPD"},
# )
#
# ax.set_xlabel("Predictor (stdz)")
# ax.set_ylabel("Outcome (stdz)")
# ax.set_title("Posterior predictive checks")
# ax.legend(ncol=2, fontsize=10);
# ax.set_xscale('log')
# ax.set_yscale('log')
# plt.show()
#
# az.plot_posterior(ppc_ms['dem'])

# Find equations for limits of HPD
hpd = pandas.DataFrame(az.hpd(ppc_ms['dem']), columns=['lower', 'upper'])
hpd['lower'] = 10**hpd['lower']
hpd['upper'] = 10**hpd['upper']
ppc_coefs = {}
for name, col in hpd.iteritems():
    ppc_coefs[name] = LinearRegression(fit_intercept=False).fit(
        ms_df['bar_width'].values.reshape(-1, 1), hpd[name].values).coef_[0]

# Group data by river
ms_group = ms_df.groupby(['river', 'bar'])
group_river = ms_df.groupby('river')
group_bar = bar_df.groupby('river')

# Initialize the visulizer class
vh = Visualizer.Visualizer()
Ejemplo n.º 16
0
with model_rlg:
    log.info("The summary on the trace is as follows: %s",
             az.summary(trace, var_names=["alpha", "beta", "db", "pie"]))
    az.plot_trace(trace, var_names=["alpha", "beta", "db", "pie"])

# ------------------- plots ------------------------------------------- #

# get the mean of theta
theta_mean = trace["theta"].mean(0)
# get idx
idx = np.argsort(x_c)
# plot the predicted p of the data
plt.figure()
plt.plot(x_c[idx], theta_mean[idx], color="C2", lw=3)
# set a vertical line at the mean of the decision boundary
plt.vlines(trace["db"].mean(), 0, 1, color="k")
# get the hpd of db
db_hpd = az.hpd(trace["db"])
plt.fill_betweenx([0, 1], db_hpd[0], db_hpd[1], color="k", alpha=0.5)
plt.scatter(x_c,
            np.random.normal(y_0, 0.02),
            marker='.',
            color=[f'C{x}' for x in y_0])
az.plot_hpd(x_c, trace["theta"], color="C2")
plt.xlabel("petal length")
plt.ylabel("theta", rotation=0)
locs, _ = plt.xticks()
plt.xticks(locs, np.round(locs + x_0.mean(), 1))
plt.show()
Ejemplo n.º 17
0
    yl = pm.Bernoulli('yl', p=theta, observed=y_0)

    trace_0 = pm.sample(1000)

# In[9]:

varnames = ['alpha', 'beta', 'bd']
az.summary(trace_0, varnames)

# In[10]:

theta = trace_0['theta'].mean(axis=0)
idx = np.argsort(x_c)
plt.plot(x_c[idx], theta[idx], color='C2', lw=3)
plt.vlines(trace_0['bd'].mean(), 0, 1, color='k')
bd_hpd = az.hpd(trace_0['bd'])
plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='k', alpha=0.5)

plt.scatter(x_c,
            np.random.normal(y_0, 0.02),
            marker='.',
            color=[f'C{x}' for x in y_0])
az.plot_hpd(x_c, trace_0['theta'], color='C2')

plt.xlabel(x_n)
plt.ylabel('theta', rotation=0)
# use original scale for xticks
locs, _ = plt.xticks()
plt.xticks(locs, np.round(locs + x_0.mean(), 1))
plt.savefig('B11197_04_04.png', dpi=300)
Ejemplo n.º 18
0
# %%
np.percentile(samples, [10, 90])

# %%
p_grid, posterior = posterior_grid_approx(success=3, tosses=3)
plt.plot(p_grid, posterior)
plt.xlabel("proportion water (p)")
plt.ylabel("Density")

# %%
samples = np.random.choice(p_grid, p=posterior, size=int(1e4), replace=True)
np.percentile(samples, [25, 75])

# %%
az.hpd(samples, credible_interval=0.5)

# %%
p_grid[posterior == max(posterior)]

# %%
stats.mode(samples)[0]

# %%
np.mean(samples), np.median(samples)

# %%
sum(posterior * abs(0.5 - p_grid))

# %%
loss = [sum(posterior * abs(p - p_grid)) for p in p_grid]
Ejemplo n.º 19
0
varnames = ['alpha', 'beta', 'sigma']
pm.summary(trace51, varnames=varnames)

#%% [markdown]
# ## Code 5.2
#%%
new_x_values = np.linspace(-3, 3.5, num=30)
shared_x.set_value(new_x_values)
shared_y.set_value(np.repeat(0, repeats=len(new_x_values)))
with m51:
    post_pred = pm.sample_posterior_predictive(trace51,
                                               samples=1000,
                                               model=m51)

#%%
mu_hpd = az.hpd(trace51['mu'], credible_interval=.89)
post_pred_hpd = az.hpd(post_pred['divorce'], credible_interval=.89)

#%%
idx = sort_vals(d.medianagemarriage_s)
sorted_x_vals = d.medianagemarriage_s[idx]

plt.figure(figsize=(10, 8))
plt.plot(d.medianagemarriage_s.values,
         d.divorce.values,
         color='blue',
         marker='.',
         linestyle='')
plt.plot(sorted_x_vals,
         trace51['alpha'].mean() + np.mean(trace51['beta']) * sorted_x_vals,
         color='black',
Ejemplo n.º 20
0
        trace["a"]
        + trace["a_actor"][:, actor]
        + (trace["bp"] + trace["bpC"] * condition) * prosoc_left
    )
    return logistic(logodds)


# %%
prosoc_left = [0, 1, 0, 1]
condition = [0, 0, 1, 1]

pred_raw = np.asarray(
    [p_link(p_l, c_d, 2 - 1, trace_12_4) for p_l, c_d in zip(prosoc_left, condition)]
).T
pred_p = pred_raw.mean(axis=0)
pred_p_PI = az.hpd(pred_raw, credible_interval=0.89)

# %%
d_pred = pd.DataFrame(
    dict(prosoc_left=[0, 1, 0, 1], condition=[0, 0, 1, 1], actor=np.repeat(2 - 1, 4))
)

# %%
a_actor_zeros = np.zeros((1000, 7))

# %%


def p_link(prosoc_left, condition, actor_sim, trace):
    Nsim = actor_sim.shape[0] // trace.nchains
    trace = trace[:Nsim]
    # Plot results
    _, ax = plt.subplots(figsize=(10, 6))
    
    fp = logistic(pred_samples['f_pred'])
    fp_mean = np.mean(fp, 0)
    
    ax.plot(X_new[:, 0], fp_mean)
    # plot the data (with some jitter) and the true latent function
    ax.scatter(x_1, np.random.normal(y, 0.02),
               marker='.', color=[f'C{x}' for x in y])
    
    az.plot_hpd(X_new[:, 0], fp, color='C2')
    
    db = np.array([find_midpoint(f, X_new[:, 0], 0.5) for f in fp])
    db_mean = db.mean()
    db_hpd = az.hpd(db)
    ax.vlines(db_mean, 0, 1, color='k')
    ax.fill_betweenx([0, 1], db_hpd[0], db_hpd[1], color='k', alpha=0.5)
    ax.set_xlabel('sepal_length')
    ax.set_ylabel('θ', rotation=0)
    plt.savefig('../figures/gp_classify_iris1.pdf', dpi=300)

# Change kernel to be sum of SE and linear, to improve tail behavior

with pm.Model() as model_iris2:
    #ℓ = pm.HalfCauchy("ℓ", 1)
    ℓ = pm.Gamma('ℓ', 2, 0.5)
    c = pm.Normal('c', x_1.min())
    τ = pm.HalfNormal('τ', 5)
    cov = (pm.gp.cov.ExpQuad(1, ℓ) +
           τ * pm.gp.cov.Linear(1, c) +
x_c = x_0 - x_0.mean()

with pm.Model() as model_simple:
    α = pm.Normal('α', mu=0, sd=10)
    β = pm.Normal('β', mu=0, sd=10)
    μ = α + pm.math.dot(x_c, β)    
    θ = pm.Deterministic('θ', pm.math.sigmoid(μ))
    bd = pm.Deterministic('bd', -α/β)
    y_1 = pm.Bernoulli('y_1', p=θ, observed=y_simple)
    trace_simple = pm.sample(1000, tune=1000)
#%% this is slow convergence
theta = trace_simple['θ'].mean(axis=0)
idx = np.argsort(x_c)
plt.plot(x_c[idx], theta[idx], color='C2', lw=3)
plt.vlines(trace_simple['bd'].mean(), 0, 1, color='k')
bd_hpd = az.hpd(trace_simple['bd'])
plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='k', alpha=0.5)
plt.scatter(x_c, np.random.normal(y_simple, 0.02),
            marker='.', color=[f'C{x}' for x in y_simple])
az.plot_hpd(x_c, trace_simple['θ'], color='C2')
plt.xlabel(x_n)
plt.ylabel('θ', rotation=0)
locs, _ = plt.xticks()
plt.xticks(locs, np.round(locs + x_0.mean(), 1));
#%% 
data['age'] = data['age'] / 10
data['age2'] = np.square(data['age'])
with pm.Model() as logistic_model:
    pm.glm.GLM.from_formula('outcome ~ age + age2 + job + marital + education + default + housing + loan + contact + month + day_of_week + duration + campaign + pdays + previous + euribor3m', data, family = pm.glm.families.Binomial())
    trace = pm.sample(500, tune = 500, init = 'adapt_diag')
az.plot_trace(trace);
Ejemplo n.º 23
0
# 7.5
m7_3.name = 'm73'
m7_4.name = 'm74'
pm.compare({m7_3:tracem73, m7_4:tracem74})

# 7.6
rugged_seq = np.arange(-1,8,.25)
mu_Af = np.zeros((len(rugged_seq),tracem74['mu'].shape[0]))
mu_noAf = np.zeros((len(rugged_seq),tracem74['mu'].shape[0]))

for row, seq in enumerate(rugged_seq):
    mu_Af[row,:] = tracem74['alpha'] + tracem74['beta']*rugged_seq[row] + tracem74['beta2']*1
    mu_noAf[row,:] = tracem74['alpha'] + tracem74['beta']*rugged_seq[row] + tracem74['beta2']*0

hpd_af = az.hpd(mu_Af.T,credible_interval=.97)
hpd_noaf = az.hpd(mu_noAf.T,credible_interval=.97)


plt.plot(da1.rugged, da1.log_gdp, marker = 'o', linestyle = '', color = 'blue')
plt.plot(rugged_seq, mu_Af.mean(1), color = 'blue')
plt.fill_between(rugged_seq, hpd_af[:,0], hpd_af[:,1], alpha = .4)
plt.plot(da0.rugged, da0.log_gdp, marker = 'o', linestyle = '', color = 'black')
plt.plot(rugged_seq, mu_noAf.mean(1), color = 'black')
plt.fill_between(rugged_seq, hpd_noaf[:,0], hpd_noaf[:,1], alpha = .2, color = 'black')
plt.xlabel('Terrain Ruggedness Index')
plt.ylabel('log GDP')

# 7.7
with pm.Model() as m7_5:
    alpha = pm.Normal('alpha', mu = 8, sigma = 100)
Ejemplo n.º 24
0
def get_hpd(x, ci):
    if len(x) == 0:
        return np.array([np.nan, np.nan])
    return az.hpd(x[~np.isnan(x)], credible_interval=ci)
    trace_0 = pm.sample(1000)

varnames = ['α', 'β', 'bd']
az.summary(trace_0, varnames)

theta = trace_0['θ'].mean(axis=0)
idx = np.argsort(x_c)

plt.figure()
# plot logistic curve
plt.plot(x_c[idx], theta[idx], color='C2', lw=3)
az.plot_hpd(x_c, trace_0['θ'], color='C2')

# plot decision boundary
plt.vlines(trace_0['bd'].mean(), 0, 1, color='k')
bd_hpd = az.hpd(trace_0['bd'])
plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='k', alpha=0.5)

# plot jittered data
plt.scatter(x_c,
            np.random.normal(y_0, 0.02),
            marker='.',
            color=[f'C{x}' for x in y_0])

plt.xlabel(x_n)
plt.ylabel('p(y=1)', rotation=0)
# use original scale for xticks
locs, _ = plt.xticks()
plt.xticks(locs, np.round(locs + xmean, 1))
#plt.xticks(x_c[idx], np.round(x_0[idx], 1))
#plt.savefig('../figures/logreg_bayes_1d_sat.pdf', dpi=300)