def out_diagnostic_plots(self):
        with plt.style.context(lk.MPLSTYLE):
            fig, ax = plt.subplots()
            res = [np.median(self.trace[label]) for label in ['numax', 'w', 'A', 'V1','V2']]
            resls = [np.median(self.trace[label],axis=0) for label in ['a0','a1','a2']]
            resfs = [np.median(self.trace[label],axis=0) for label in ['f0', 'f1', 'f2']]

            ax.plot(resfs[0], self.mod.A0(f0_, res,theano=False), label='0 Trend',lw=2, zorder=1)
            ax.plot(resfs[1], self.mod.A1(f1_, res,theano=False), label='1 Trend',lw=2, zorder=1)
            ax.plot(resfs[2], self.mod.A2(f2_, res,theano=False), label='2 Trend',lw=2, zorder=1)

            ax.scatter(resfs[0], resls[0], marker='^',label='0 mod', s=10, zorder=3)
            ax.scatter(resfs[1], resls[1], marker='*',label='1 mod', s=10, zorder=3)
            ax.scatter(resfs[2], resls[2], marker='o',label='2 mod', s=10, zorder=3)

            ax.legend(loc='upper center', ncol=4, bbox_to_anchor=(0.5, 1.3))
            ax.set_xlabel('Frequency')
            ax.set_ylabel('Amplitude')

            plt.savefig(self.dir + 'amplitudefit.png')
            plt.close()

            fig, ax = plt.subplots()
            res = [np.median(self.trace[label]) for label in ['numax', 'alpha', 'epsilon','d01','d02']]
            resls = [np.median(self.trace[label],axis=0) for label in ['f0','f1','f2']]

            ax.plot(self.mod.f0(res)%self.mod.deltanu, self.mod.n0, label='0 Trend',lw=2, zorder=1)
            ax.plot(self.mod.f1(res)%self.mod.deltanu, self.mod.n1, label='1 Trend',lw=2, zorder=1)
            ax.plot(self.mod.f2(res)%self.mod.deltanu, self.mod.n2, label='2 Trend',lw=2, zorder=1)

            ax.scatter(resls[0]%self.mod.deltanu, self.mod.n0, marker='^',label='0 mod', s=10, zorder=3)
            ax.scatter(resls[1]%self.mod.deltanu, self.mod.n1, marker='*',label='1 mod', s=10, zorder=3)
            ax.scatter(resls[2]%self.mod.deltanu, self.mod.n2, marker='o',label='2 mod', s=10, zorder=3)

            ax.set_xlabel(r'Frequency mod $\Delta\nu$')
            ax.set_ylabel('Overtone order n')
            ax.legend(loc='upper center', ncol=4, bbox_to_anchor=(0.5, 1.3))

            plt.savefig(self.dir + 'frequencyfit.png')
            plt.close()

            fig, ax = plt.subplots()
            resls = [np.median(self.trace[label],axis=0) for label in ['f0','f1','f2']]
            nflin = np.linspace(self.nf_.min(), self.nf_.max(), 100)

            plot_gp_dist(ax, self.trace['g0'], resls[0], palette='viridis', fill_alpha=.05)

            ax.scatter(resls[0], np.median(self.trace['g0'],axis=0), marker='^', label='mod', s=10,zorder=5)
            ax.scatter(resls[1], np.median(self.trace['g1'],axis=0), marker='*', label='mod 1', s=10,zorder=5)
            ax.scatter(resls[2], np.median(self.trace['g2'],axis=0), marker='o', label='mod 2', s=10,zorder=5)

            ax.errorbar(resls[0], np.median(self.trace['g0'],axis=0), yerr=np.std(self.trace['g0'],axis=0), fmt='|', c='k', lw=3, alpha=.5)
            ax.errorbar(resls[1], np.median(self.trace['g1'],axis=0), yerr=np.std(self.trace['g1'],axis=0), fmt='|', c='k', lw=3, alpha=.5)
            ax.errorbar(resls[2], np.median(self.trace['g2'],axis=0), yerr=np.std(self.trace['g2'],axis=0), fmt='|', c='k', lw=3, alpha=.5)

            ax.legend(loc='upper center', ncol=2, bbox_to_anchor=(0.5, 1.3))

            plt.savefig(self.dir + 'widthfit.png')
            plt.close()
Exemple #2
0
def plot_posterior_predictive(pred_samples, X_new, X, y):
    fig = plt.figure(figsize=(24, 10))
    ax = fig.gca()
    plot_gp_dist(ax, pred_samples["f_pred"], X_new)
    plt.plot(X, y, 'ob', markersize=9, alpha=0.5, label="Observed data")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.legend(loc="best")
    plt.savefig(os.path.join(OUTPUT_DIR_PATH, "pred_samples.png"))
Exemple #3
0
def plotPrediction(ax,X,y,N,pred,mindate,lag=None,prev_mult=1,plot_gp=False):
    """Predictive time series plot with (by default) the prediction
    summarised as [0.01,0.05,0.5,0.95,0.99] quantiles, and observations colour-coded
    by tail-probability.

    Parameters
    ==========
    ax -- a set of axes on which to plot
    X  -- 1D array-like of times of length n
    y  -- 1D array-like of observed number of cases at each time of length n
    N  -- 1D array-like of total number at each time of length n
    pred -- 2D m x n array with numerical draws from posterior
    mindate -- a pandas.Timestamp representing the time origin wrt X
    lag     -- how many days prior to max(X) to plot
    prev_mult -- prevalence multiplier (to get in, eg. prev per 1000 population)
    plot_gp -- plots a GP smudge-o-gram rather than 95% and 99% quantiles.

    Returns
    =======
    Nothing.   Just modifies ax
    """

    from pymc3.gp.util import plot_gp_dist

    # Time slice
    ts = slice(0,X.shape[0])
    if lag is not None:
        ts = slice(X.shape[0]-lag, X.shape[0])
    
    # Data
    x = np.array([mindate + pd.Timedelta(d,unit='D') for d in X[ts]])
    pbar = np.array(y/N)[ts] * prev_mult

    # Prediction quantiles
    phat = pm.invlogit(pred[:,ts]).eval() * prev_mult
    pctiles = np.percentile(phat, [1,5,50,95,99], axis=0)

    # Tail probabilities for observed p
    prp = np.sum(pbar > phat, axis=0)/phat.shape[0]
    prp[prp > .5] = 1. - prp[prp > .5]
    
    # Risk masks
    red = prp <= 0.01
    orange = (0.01 < prp) & (prp <= 0.05)
    green = 0.05 < prp

    # Construct plot
    if plot_gp is True:
        plot_gp_dist(ax,phat,x,plot_samples=False, palette="Blues")
    else:
        ax.fill_between(x, pctiles[4,:], pctiles[0,:], color='lightgrey',alpha=.5,label="99% credible interval")
        ax.fill_between(x, pctiles[3,:], pctiles[1,:], color='lightgrey',alpha=1,label='95% credible interval')
    ax.plot(x, pctiles[2,:], c='grey', ls='-', label="Predicted prevalence")
    ax.scatter(x[green],pbar[green],c='green',s=8,alpha=0.5,label='0.05<p')
    ax.scatter(x[orange],pbar[orange],c='orange',s=8,alpha=0.5,label='0.01<p<=0.05')
    ax.scatter(x[red],pbar[red],c='red',s=8,alpha=0.5,label='p<=0.01')
Exemple #4
0
def plot_posterior(trc, xs, ys):
    # plot the results
    fig = plt.figure(figsize=(12, 5))
    ax = fig.gca()

    # plot the samples from the gp posterior with samples and shading
    # fの事後確率を描画
    plot_gp_dist(ax, trc["f"], xs)

    # plot the data and the true latent function
    plt.plot(xs, ys, 'ok', ms=3, alpha=0.5, label="Observed data")

    # axis labels and title
    plt.xlabel("x")
    plt.ylabel("y")
    plt.legend(loc="best")
    plt.savefig(os.path.join(OUTPUT_DIR_PATH, "posterior.png"))
Exemple #5
0
    def plot_linewidth(self, thin=10):
        """
        Plots the estimated line width as a function of scaled n.
        """
        fig, ax = plt.subplots(1, 2, figsize=[16, 9])

        if self.gp0 != []:
            from pymc3.gp.util import plot_gp_dist

            n_new = np.linspace(-0.2, 1.2, 100)[:, None]
            with self.pm_model:
                f_pred0 = self.gp0.conditional("f_pred0", n_new)
                f_pred2 = self.gp2.conditional("f_pred2", n_new)
                self.pred_samples = pm.sample_posterior_predictive(
                    self.samples, vars=[f_pred0, f_pred2], samples=1000)
            plot_gp_dist(ax[0], self.pred_samples["f_pred0"], n_new)
            plot_gp_dist(ax[1], self.pred_samples["f_pred2"], n_new)

            for i in range(0, len(self.samples), thin):
                ax[0].scatter(self.n,
                              self.samples['ln_width0'][i, :],
                              c='k',
                              alpha=0.3)
                ax[1].scatter(self.n,
                              self.samples['ln_width2'][i, :],
                              c='k',
                              alpha=0.3)

        else:
            for i in range(0, len(self.samples), thin):
                ax[0].scatter(self.n,
                              np.log(self.samples['width0'][i, :]),
                              c='k',
                              alpha=0.3)
                ax[1].scatter(self.n,
                              np.log(self.samples['width2'][i, :]),
                              c='k',
                              alpha=0.3)

        ax[0].set_xlabel('normalised order')
        ax[1].set_xlabel('normalised order')
        ax[0].set_ylabel('ln line width')
        ax[1].set_ylabel('ln line width')
        ax[0].set_title('Radial modes')
        ax[1].set_title('Quadrupole modes')
        return fig
mulin = nflin * np.median(trace['m']) + np.median(trace['c'])

with pm_model:
    f_pred = gp.conditional("f_pred", nflin[:, None])
    expf_pred = pm.Deterministic('expf_pred', tt.exp(f_pred))
    pred_samples = pm.sample_posterior_predictive(trace,
                                                  vars=[expf_pred],
                                                  samples=1000)

# In[27]:

with plt.style.context(lk.MPLSTYLE):
    fig, ax = plt.subplots()
    plot_gp_dist(ax,
                 pred_samples['expf_pred'],
                 fslin,
                 palette='viridis',
                 fill_alpha=.05)

    ax.plot(fslin,
            np.exp(mulin),
            label='Mean Trend',
            lw=2,
            ls='-.',
            alpha=.5,
            zorder=0)

    ax.scatter(f0_, widths[0], label='truth', ec='k', s=50, zorder=5)
    ax.scatter(f1_, widths[1], label='truth 1', ec='k', s=50, zorder=5)
    ax.scatter(f2_, widths[2], label='truth 2', ec='k', s=50, zorder=5)
Exemple #7
0
    Xu = pm.gp.util.kmeans_inducing_points(15, X)
    
    σ = pm.HalfCauchy("σ", beta=5)
    obs = gp.marginal_likelihood("obs", X=X, Xu=Xu, y=y, noise=σ)
    
    trace = pm.sample(1000, chains=1)
    
# add the GP conditional to the model, given the new X values
with temp_model:
    f_pred = gp.conditional("f_pred", X, pred_noise=True)

# To use the MAP values, you can just replace the trace with a length-1 list with `mp`
with temp_model:
    pred_samples = pm.sample_ppc(trace, vars=[f_pred], samples=10)
    
    
x = X.flatten()
# plot the results
fig = plt.figure(figsize=(12,5)); ax = fig.gca()

# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist
plot_gp_dist(ax, pred_samples["f_pred"], x);

# plot the data and the true latent function
plt.plot(x, y, 'ok', ms=3, alpha=0.5, label="Observed data");
plt.plot(Xu, 10*np.ones(Xu.shape[0]), "co", ms=10, label="Inducing point locations")

# axis labels and title
plt.xlabel("X");
plt.title("Posterior distribution over $f(x)$ at the observed values"); plt.legend();
def main():
    # Seed the random number generators
    np.random.seed(0)
    torch.manual_seed(0)
    
    # Create some toy data
    n = 500
    x = np.sort(np.random.uniform(0, 1, n))
    f = true_f(x)
    y = scipy.stats.bernoulli.rvs(scipy.special.expit(f))
    
    ## Uncomment to show raw data
    # plt.scatter(x, y, alpha=0.5)
    # plt.xlabel('$x$')
    # plt.ylabel('$y$')
    # plt.yticks([0, 1])
    # plt.show()

    ## Uncomment to show logits ("f")
    # fig, ax = plt.subplots()
    # x_plot = np.linspace(0, 1, 100)
    # ax.plot(x_plot, true_f(x_plot), alpha=0.5)
    # ax.scatter(x, f, alpha=0.5)
    # plt.show()

    train_x = torch.from_numpy(x.astype(np.float32))
    train_y = torch.from_numpy(y.astype(np.float32))
    
    # Set initial inducing points
    inducing_points = torch.rand(50)

    # Initialize model and likelihood
    model = GPClassificationModel(inducing_points=inducing_points)
    likelihood = BernoulliLikelihood()
    
    # Set number of epochs
    training_iter = 1000

    # Use the adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    # num_data refers to the number of training datapoints
    mll = VariationalELBO(likelihood, model, train_y.numel())

    iterator = tqdm(range(training_iter))

    for _ in iterator:

        # Zero backpropped gradients from previous iteration
        optimizer.zero_grad()
        # Get predictive output
        output = model(train_x)
        # Calc loss and backprop gradients
        loss = -mll(output, train_y)
        loss.backward()

        optimizer.step()

        iterator.set_postfix(loss=loss.item())
    
    # Show results
    test_x = torch.linspace(0, 1, 101)
    f_preds = model(test_x)

    pred = f_preds.sample(torch.Size((1000,))).numpy()

    fig, ax = plt.subplots()
    plot_gp_dist(ax, pred, test_x)
    ax.plot(test_x, true_f(test_x), alpha=0.5)
    plt.show()
def plotPrediction(ax, data, prev_mult=1, plot_gp=False):
    """Predictive time series plot with (by default) the prediction
    summarised as [0.01,0.05,0.5,0.95,0.99] quantiles, and observations colour-coded
    by tail-probability.

    Parameters
    ==========
    ax -- a set of axes on which to plot
    X  -- 1D array-like of times of length n
    y  -- 1D array-like of observed number of cases at each time of length n
    N  -- 1D array-like of total number at each time of length n
    pred -- 2D m x n array with numerical draws from posterior
    mindate -- a pandas.Timestamp representing the time origin wrt X
    lag     -- how many days prior to max(X) to plot
    prev_mult -- prevalence multiplier (to get in, eg. prev per 1000 population)
    plot_gp -- plots a GP smudge-o-gram rather than 95% and 99% quantiles.

    Returns
    =======
    Nothing.   Just modifies ax
    """

    # Prediction quantiles
    phat = data['pred'].transpose(
    ) * prev_mult  # TxM for T times and M mcmc iterations
    pctiles = np.percentile(phat, [1, 5, 50, 95, 99], axis=1)

    # Data
    pbar = data['data']['cases'] / data['data']['N'] * prev_mult
    pbar = clip_to_range(pbar, phat.index)

    # Tail probabilities for observed p
    phat_interp = phat.apply(lambda x: np.interp(pbar.index, phat.index, x))
    prp = np.sum(pbar[:, None] > phat_interp, axis=1) / phat_interp.shape[1]
    prp[prp > .5] = 1. - prp[prp > .5]

    # Risk masks
    red = prp <= 0.01
    orange = (0.01 < prp) & (prp <= 0.05)
    green = 0.05 < prp

    # Construct plot
    if plot_gp is True:
        from pymc3.gp.util import plot_gp_dist
        plot_gp_dist(ax,
                     phat,
                     phat.columns,
                     plot_samples=False,
                     palette="Blues")
    else:
        ax.fill_between(phat.index,
                        pctiles[4, :],
                        pctiles[0, :],
                        color='lightgrey',
                        alpha=.5,
                        label="99% credible interval")
        ax.fill_between(phat.index,
                        pctiles[3, :],
                        pctiles[1, :],
                        color='lightgrey',
                        alpha=1,
                        label='95% credible interval')
    ax.plot(phat.index,
            pctiles[2, :],
            c='grey',
            ls='-',
            label="Predicted prevalence")
    ax.scatter(pbar.index, pbar, s=8, alpha=0.5)
    ax.scatter(pbar.index[green],
               np.asarray(pbar)[green],
               c='green',
               s=8,
               alpha=0.5,
               label='0.05<p')
    ax.scatter(pbar.index[orange],
               np.asarray(pbar)[orange],
               c='orange',
               s=8,
               alpha=0.5,
               label='0.01<p<=0.05')
    ax.scatter(pbar.index[red],
               np.asarray(pbar)[red],
               c='red',
               s=8,
               alpha=0.5,
               label='p<=0.01')
    # The Gaussian process is a sum of these three components
    σ = pm.HalfNormal("σ", sd=2.0)

    y_ = pm.StudentT('y', mu=f, sd=σ, nu=1, observed=y)

    # this line calls an optimizer to find the MAP
    #mp = pm.find_MAP(include_transformed=True)

    trace = pm.sample(1000, chains=1)

fig = plt.figure(figsize=(12, 5))
ax = fig.gca()
# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist

plot_gp_dist(ax, trace["f"], X_obs)

# plot the data and the true latent function
ax.plot(X, mean_func_true, "dodgerblue", lw=3, label="True f")
ax.plot(X_obs, y, 'ok', ms=3, label="Data")
ax.set_xlabel("X")
ax.set_ylabel("y")
plt.legend()
# axis labels and title
plt.xlabel("X")
plt.ylabel("True f(x)")
plt.title("Posterior distribution over $f(x)$ at the observed values")
plt.legend()

#create the posterior/trace plots of the variables.
lines = [
    σ = pm.HalfNormal("σ", sd=2.0)

    y_ = pm.StudentT('y', mu=f, sd=σ, nu=1, observed=split_vals)
    #     y_ = pm.Normal('y', mu = f, sigma = σ, observed = y)

    # this line calls an optimizer to find the MAP
    #mp = pm.find_MAP(include_transformed=True)

    trace = pm.sample(1000, chains=1)

fig = plt.figure(figsize=(12, 5))
ax = fig.gca()
# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist

plot_gp_dist(ax, trace["f"], freqs)

# plot the data and the true latent function
ax.plot(freqs, split_vals, "dodgerblue", lw=3, label="True f")

ax.set_xlabel("X")
ax.set_ylabel("y")
plt.legend()
# axis labels and title
plt.xlabel("X")
plt.ylabel("True f(x)")
plt.title("Posterior distribution over $f(x)$ at the observed values")
plt.legend()

#create the posterior/trace plots of the variables.
lines = [
Exemple #12
0
#%% Predict new values from x=0 to x=20
X_new = np.linspace(0, 20, 600)[:, None]

# add the GP conditional to the model, given the new X values
with model:
    f_pred = gp.conditional("f_pred", X_new)

# To use the MAP values, you can just replace the trace with a length-1 list with `mp`
with model:
    pred_samples = pm.sample_posterior_predictive([mp0],
                                                  vars=[f_pred],
                                                  samples=1000)

#%% Plot the results

fig = plt.figure(figsize=(12, 5))
ax = fig.gca()

# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist
plot_gp_dist(ax, pred_samples["f_pred"], X_new)

# plot the data and the true latent function
plt.plot(X, f_true, "dodgerblue", lw=3, label="True f")
plt.plot(X, y, 'ok', ms=3, alpha=0.5, label="Observed data")

# axis labels and title
plt.xlabel("X")
plt.ylim([-13, 13])
plt.title("Posterior distribution over $f(x)$ at the observed values")
plt.legend()
        sigma = pm.HalfCauchy("sigma", beta=5)
        nu = pm.Gamma("nu", alpha=2, beta=0.1)
        y_ = pm.StudentT("y", mu=f, lam=1.0 / sigma, nu=nu, observed=y)

        trace = pm.sample(200, n_init=100, tune=100, chains=2, cores=2, return_inferencedata=True)
        az.to_netcdf(trace, 'src/experiments/results/lat_gp_trace')

    # check Rhat, values above 1 may indicate convergence issues
    n_nonconverged = int(np.sum(az.rhat(trace)[["eta", "l", "f_rotated_"]].to_array() > 1.03).values)
    print("%i variables MCMC chains appear not to have converged." % n_nonconverged)

    # plot the results
    fig = plt.figure(figsize=(12, 5))
    ax = fig.gca()

    # plot the samples from the gp posterior with samples and shading
    from pymc3.gp.util import plot_gp_dist

    plot_gp_dist(ax, trace.posterior["f"][0, :, :], X)

    # plot the data and the true latent function
    ax.plot(X, f_true, "dodgerblue", lw=3, label="True generating function 'f'")
    ax.plot(X, y, "ok", ms=3, label="Observed data")

    # axis labels and title
    plt.xlabel("X")
    plt.ylabel("True f(x)")
    plt.title("Posterior distribution over $f(x)$ at the observed values")
    plt.legend()
    plt.show()
Exemple #14
0
    # The Gaussian process is a sum of these three components
    σ = pm.HalfNormal("σ", sd=2.0)

    y_ = pm.StudentT('y', mu=f, sd=σ, nu=1, observed=y)
    #     y_ = pm.Normal('y', mu = f, sigma = σ, observed = y)

    # this line calls an optimizer to find the MAP
    #mp = pm.find_MAP(include_transformed=True)

    trace = pm.sample(1000, chains=1)

fig = plt.figure(figsize=(12, 5))
ax = fig.gca()
# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist
plot_gp_dist(ax, trace["f"], freqs[:-1])

# plot the data and the true latent function
ax.plot(freqs[:-1], vals, "dodgerblue", lw=3, label="True f")
ax.plot(freqs[:-1], y, 'ok', ms=3, label="Data")
ax.set_xlabel("X")
ax.set_ylabel("y")
plt.legend()
# axis labels and title
plt.xlabel("X")
plt.ylabel("True f(x)")
plt.title("Posterior distribution over $f(x)$ at the observed values")
plt.legend()

#create the posterior/trace plots of the variables.
lines = [
Exemple #15
0
    trace = pm.sample(1000, chains=1)

# add the GP conditional to the model, given the new X values
with temp_model:
    f_pred = gp.conditional("f_pred", X, pred_noise=True)

# To use the MAP values, you can just replace the trace with a length-1 list with `mp`
with temp_model:
    pred_samples = pm.sample_ppc(trace, vars=[f_pred], samples=10)

x = X.flatten()
# plot the results
fig = plt.figure(figsize=(12, 5))
ax = fig.gca()

# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist
plot_gp_dist(ax, pred_samples["f_pred"], x)

# plot the data and the true latent function
plt.plot(x, y, 'ok', ms=3, alpha=0.5, label="Observed data")
plt.plot(Xu,
         10 * np.ones(Xu.shape[0]),
         "co",
         ms=10,
         label="Inducing point locations")

# axis labels and title
plt.xlabel("X")
plt.title("Posterior distribution over $f(x)$ at the observed values")
plt.legend()