def out_diagnostic_plots(self): with plt.style.context(lk.MPLSTYLE): fig, ax = plt.subplots() res = [np.median(self.trace[label]) for label in ['numax', 'w', 'A', 'V1','V2']] resls = [np.median(self.trace[label],axis=0) for label in ['a0','a1','a2']] resfs = [np.median(self.trace[label],axis=0) for label in ['f0', 'f1', 'f2']] ax.plot(resfs[0], self.mod.A0(f0_, res,theano=False), label='0 Trend',lw=2, zorder=1) ax.plot(resfs[1], self.mod.A1(f1_, res,theano=False), label='1 Trend',lw=2, zorder=1) ax.plot(resfs[2], self.mod.A2(f2_, res,theano=False), label='2 Trend',lw=2, zorder=1) ax.scatter(resfs[0], resls[0], marker='^',label='0 mod', s=10, zorder=3) ax.scatter(resfs[1], resls[1], marker='*',label='1 mod', s=10, zorder=3) ax.scatter(resfs[2], resls[2], marker='o',label='2 mod', s=10, zorder=3) ax.legend(loc='upper center', ncol=4, bbox_to_anchor=(0.5, 1.3)) ax.set_xlabel('Frequency') ax.set_ylabel('Amplitude') plt.savefig(self.dir + 'amplitudefit.png') plt.close() fig, ax = plt.subplots() res = [np.median(self.trace[label]) for label in ['numax', 'alpha', 'epsilon','d01','d02']] resls = [np.median(self.trace[label],axis=0) for label in ['f0','f1','f2']] ax.plot(self.mod.f0(res)%self.mod.deltanu, self.mod.n0, label='0 Trend',lw=2, zorder=1) ax.plot(self.mod.f1(res)%self.mod.deltanu, self.mod.n1, label='1 Trend',lw=2, zorder=1) ax.plot(self.mod.f2(res)%self.mod.deltanu, self.mod.n2, label='2 Trend',lw=2, zorder=1) ax.scatter(resls[0]%self.mod.deltanu, self.mod.n0, marker='^',label='0 mod', s=10, zorder=3) ax.scatter(resls[1]%self.mod.deltanu, self.mod.n1, marker='*',label='1 mod', s=10, zorder=3) ax.scatter(resls[2]%self.mod.deltanu, self.mod.n2, marker='o',label='2 mod', s=10, zorder=3) ax.set_xlabel(r'Frequency mod $\Delta\nu$') ax.set_ylabel('Overtone order n') ax.legend(loc='upper center', ncol=4, bbox_to_anchor=(0.5, 1.3)) plt.savefig(self.dir + 'frequencyfit.png') plt.close() fig, ax = plt.subplots() resls = [np.median(self.trace[label],axis=0) for label in ['f0','f1','f2']] nflin = np.linspace(self.nf_.min(), self.nf_.max(), 100) plot_gp_dist(ax, self.trace['g0'], resls[0], palette='viridis', fill_alpha=.05) ax.scatter(resls[0], np.median(self.trace['g0'],axis=0), marker='^', label='mod', s=10,zorder=5) ax.scatter(resls[1], np.median(self.trace['g1'],axis=0), marker='*', label='mod 1', s=10,zorder=5) ax.scatter(resls[2], np.median(self.trace['g2'],axis=0), marker='o', label='mod 2', s=10,zorder=5) ax.errorbar(resls[0], np.median(self.trace['g0'],axis=0), yerr=np.std(self.trace['g0'],axis=0), fmt='|', c='k', lw=3, alpha=.5) ax.errorbar(resls[1], np.median(self.trace['g1'],axis=0), yerr=np.std(self.trace['g1'],axis=0), fmt='|', c='k', lw=3, alpha=.5) ax.errorbar(resls[2], np.median(self.trace['g2'],axis=0), yerr=np.std(self.trace['g2'],axis=0), fmt='|', c='k', lw=3, alpha=.5) ax.legend(loc='upper center', ncol=2, bbox_to_anchor=(0.5, 1.3)) plt.savefig(self.dir + 'widthfit.png') plt.close()
def plot_posterior_predictive(pred_samples, X_new, X, y): fig = plt.figure(figsize=(24, 10)) ax = fig.gca() plot_gp_dist(ax, pred_samples["f_pred"], X_new) plt.plot(X, y, 'ob', markersize=9, alpha=0.5, label="Observed data") plt.xlabel("x") plt.ylabel("y") plt.legend(loc="best") plt.savefig(os.path.join(OUTPUT_DIR_PATH, "pred_samples.png"))
def plotPrediction(ax,X,y,N,pred,mindate,lag=None,prev_mult=1,plot_gp=False): """Predictive time series plot with (by default) the prediction summarised as [0.01,0.05,0.5,0.95,0.99] quantiles, and observations colour-coded by tail-probability. Parameters ========== ax -- a set of axes on which to plot X -- 1D array-like of times of length n y -- 1D array-like of observed number of cases at each time of length n N -- 1D array-like of total number at each time of length n pred -- 2D m x n array with numerical draws from posterior mindate -- a pandas.Timestamp representing the time origin wrt X lag -- how many days prior to max(X) to plot prev_mult -- prevalence multiplier (to get in, eg. prev per 1000 population) plot_gp -- plots a GP smudge-o-gram rather than 95% and 99% quantiles. Returns ======= Nothing. Just modifies ax """ from pymc3.gp.util import plot_gp_dist # Time slice ts = slice(0,X.shape[0]) if lag is not None: ts = slice(X.shape[0]-lag, X.shape[0]) # Data x = np.array([mindate + pd.Timedelta(d,unit='D') for d in X[ts]]) pbar = np.array(y/N)[ts] * prev_mult # Prediction quantiles phat = pm.invlogit(pred[:,ts]).eval() * prev_mult pctiles = np.percentile(phat, [1,5,50,95,99], axis=0) # Tail probabilities for observed p prp = np.sum(pbar > phat, axis=0)/phat.shape[0] prp[prp > .5] = 1. - prp[prp > .5] # Risk masks red = prp <= 0.01 orange = (0.01 < prp) & (prp <= 0.05) green = 0.05 < prp # Construct plot if plot_gp is True: plot_gp_dist(ax,phat,x,plot_samples=False, palette="Blues") else: ax.fill_between(x, pctiles[4,:], pctiles[0,:], color='lightgrey',alpha=.5,label="99% credible interval") ax.fill_between(x, pctiles[3,:], pctiles[1,:], color='lightgrey',alpha=1,label='95% credible interval') ax.plot(x, pctiles[2,:], c='grey', ls='-', label="Predicted prevalence") ax.scatter(x[green],pbar[green],c='green',s=8,alpha=0.5,label='0.05<p') ax.scatter(x[orange],pbar[orange],c='orange',s=8,alpha=0.5,label='0.01<p<=0.05') ax.scatter(x[red],pbar[red],c='red',s=8,alpha=0.5,label='p<=0.01')
def plot_posterior(trc, xs, ys): # plot the results fig = plt.figure(figsize=(12, 5)) ax = fig.gca() # plot the samples from the gp posterior with samples and shading # fの事後確率を描画 plot_gp_dist(ax, trc["f"], xs) # plot the data and the true latent function plt.plot(xs, ys, 'ok', ms=3, alpha=0.5, label="Observed data") # axis labels and title plt.xlabel("x") plt.ylabel("y") plt.legend(loc="best") plt.savefig(os.path.join(OUTPUT_DIR_PATH, "posterior.png"))
def plot_linewidth(self, thin=10): """ Plots the estimated line width as a function of scaled n. """ fig, ax = plt.subplots(1, 2, figsize=[16, 9]) if self.gp0 != []: from pymc3.gp.util import plot_gp_dist n_new = np.linspace(-0.2, 1.2, 100)[:, None] with self.pm_model: f_pred0 = self.gp0.conditional("f_pred0", n_new) f_pred2 = self.gp2.conditional("f_pred2", n_new) self.pred_samples = pm.sample_posterior_predictive( self.samples, vars=[f_pred0, f_pred2], samples=1000) plot_gp_dist(ax[0], self.pred_samples["f_pred0"], n_new) plot_gp_dist(ax[1], self.pred_samples["f_pred2"], n_new) for i in range(0, len(self.samples), thin): ax[0].scatter(self.n, self.samples['ln_width0'][i, :], c='k', alpha=0.3) ax[1].scatter(self.n, self.samples['ln_width2'][i, :], c='k', alpha=0.3) else: for i in range(0, len(self.samples), thin): ax[0].scatter(self.n, np.log(self.samples['width0'][i, :]), c='k', alpha=0.3) ax[1].scatter(self.n, np.log(self.samples['width2'][i, :]), c='k', alpha=0.3) ax[0].set_xlabel('normalised order') ax[1].set_xlabel('normalised order') ax[0].set_ylabel('ln line width') ax[1].set_ylabel('ln line width') ax[0].set_title('Radial modes') ax[1].set_title('Quadrupole modes') return fig
mulin = nflin * np.median(trace['m']) + np.median(trace['c']) with pm_model: f_pred = gp.conditional("f_pred", nflin[:, None]) expf_pred = pm.Deterministic('expf_pred', tt.exp(f_pred)) pred_samples = pm.sample_posterior_predictive(trace, vars=[expf_pred], samples=1000) # In[27]: with plt.style.context(lk.MPLSTYLE): fig, ax = plt.subplots() plot_gp_dist(ax, pred_samples['expf_pred'], fslin, palette='viridis', fill_alpha=.05) ax.plot(fslin, np.exp(mulin), label='Mean Trend', lw=2, ls='-.', alpha=.5, zorder=0) ax.scatter(f0_, widths[0], label='truth', ec='k', s=50, zorder=5) ax.scatter(f1_, widths[1], label='truth 1', ec='k', s=50, zorder=5) ax.scatter(f2_, widths[2], label='truth 2', ec='k', s=50, zorder=5)
Xu = pm.gp.util.kmeans_inducing_points(15, X) σ = pm.HalfCauchy("σ", beta=5) obs = gp.marginal_likelihood("obs", X=X, Xu=Xu, y=y, noise=σ) trace = pm.sample(1000, chains=1) # add the GP conditional to the model, given the new X values with temp_model: f_pred = gp.conditional("f_pred", X, pred_noise=True) # To use the MAP values, you can just replace the trace with a length-1 list with `mp` with temp_model: pred_samples = pm.sample_ppc(trace, vars=[f_pred], samples=10) x = X.flatten() # plot the results fig = plt.figure(figsize=(12,5)); ax = fig.gca() # plot the samples from the gp posterior with samples and shading from pymc3.gp.util import plot_gp_dist plot_gp_dist(ax, pred_samples["f_pred"], x); # plot the data and the true latent function plt.plot(x, y, 'ok', ms=3, alpha=0.5, label="Observed data"); plt.plot(Xu, 10*np.ones(Xu.shape[0]), "co", ms=10, label="Inducing point locations") # axis labels and title plt.xlabel("X"); plt.title("Posterior distribution over $f(x)$ at the observed values"); plt.legend();
def main(): # Seed the random number generators np.random.seed(0) torch.manual_seed(0) # Create some toy data n = 500 x = np.sort(np.random.uniform(0, 1, n)) f = true_f(x) y = scipy.stats.bernoulli.rvs(scipy.special.expit(f)) ## Uncomment to show raw data # plt.scatter(x, y, alpha=0.5) # plt.xlabel('$x$') # plt.ylabel('$y$') # plt.yticks([0, 1]) # plt.show() ## Uncomment to show logits ("f") # fig, ax = plt.subplots() # x_plot = np.linspace(0, 1, 100) # ax.plot(x_plot, true_f(x_plot), alpha=0.5) # ax.scatter(x, f, alpha=0.5) # plt.show() train_x = torch.from_numpy(x.astype(np.float32)) train_y = torch.from_numpy(y.astype(np.float32)) # Set initial inducing points inducing_points = torch.rand(50) # Initialize model and likelihood model = GPClassificationModel(inducing_points=inducing_points) likelihood = BernoulliLikelihood() # Set number of epochs training_iter = 1000 # Use the adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # num_data refers to the number of training datapoints mll = VariationalELBO(likelihood, model, train_y.numel()) iterator = tqdm(range(training_iter)) for _ in iterator: # Zero backpropped gradients from previous iteration optimizer.zero_grad() # Get predictive output output = model(train_x) # Calc loss and backprop gradients loss = -mll(output, train_y) loss.backward() optimizer.step() iterator.set_postfix(loss=loss.item()) # Show results test_x = torch.linspace(0, 1, 101) f_preds = model(test_x) pred = f_preds.sample(torch.Size((1000,))).numpy() fig, ax = plt.subplots() plot_gp_dist(ax, pred, test_x) ax.plot(test_x, true_f(test_x), alpha=0.5) plt.show()
def plotPrediction(ax, data, prev_mult=1, plot_gp=False): """Predictive time series plot with (by default) the prediction summarised as [0.01,0.05,0.5,0.95,0.99] quantiles, and observations colour-coded by tail-probability. Parameters ========== ax -- a set of axes on which to plot X -- 1D array-like of times of length n y -- 1D array-like of observed number of cases at each time of length n N -- 1D array-like of total number at each time of length n pred -- 2D m x n array with numerical draws from posterior mindate -- a pandas.Timestamp representing the time origin wrt X lag -- how many days prior to max(X) to plot prev_mult -- prevalence multiplier (to get in, eg. prev per 1000 population) plot_gp -- plots a GP smudge-o-gram rather than 95% and 99% quantiles. Returns ======= Nothing. Just modifies ax """ # Prediction quantiles phat = data['pred'].transpose( ) * prev_mult # TxM for T times and M mcmc iterations pctiles = np.percentile(phat, [1, 5, 50, 95, 99], axis=1) # Data pbar = data['data']['cases'] / data['data']['N'] * prev_mult pbar = clip_to_range(pbar, phat.index) # Tail probabilities for observed p phat_interp = phat.apply(lambda x: np.interp(pbar.index, phat.index, x)) prp = np.sum(pbar[:, None] > phat_interp, axis=1) / phat_interp.shape[1] prp[prp > .5] = 1. - prp[prp > .5] # Risk masks red = prp <= 0.01 orange = (0.01 < prp) & (prp <= 0.05) green = 0.05 < prp # Construct plot if plot_gp is True: from pymc3.gp.util import plot_gp_dist plot_gp_dist(ax, phat, phat.columns, plot_samples=False, palette="Blues") else: ax.fill_between(phat.index, pctiles[4, :], pctiles[0, :], color='lightgrey', alpha=.5, label="99% credible interval") ax.fill_between(phat.index, pctiles[3, :], pctiles[1, :], color='lightgrey', alpha=1, label='95% credible interval') ax.plot(phat.index, pctiles[2, :], c='grey', ls='-', label="Predicted prevalence") ax.scatter(pbar.index, pbar, s=8, alpha=0.5) ax.scatter(pbar.index[green], np.asarray(pbar)[green], c='green', s=8, alpha=0.5, label='0.05<p') ax.scatter(pbar.index[orange], np.asarray(pbar)[orange], c='orange', s=8, alpha=0.5, label='0.01<p<=0.05') ax.scatter(pbar.index[red], np.asarray(pbar)[red], c='red', s=8, alpha=0.5, label='p<=0.01')
# The Gaussian process is a sum of these three components σ = pm.HalfNormal("σ", sd=2.0) y_ = pm.StudentT('y', mu=f, sd=σ, nu=1, observed=y) # this line calls an optimizer to find the MAP #mp = pm.find_MAP(include_transformed=True) trace = pm.sample(1000, chains=1) fig = plt.figure(figsize=(12, 5)) ax = fig.gca() # plot the samples from the gp posterior with samples and shading from pymc3.gp.util import plot_gp_dist plot_gp_dist(ax, trace["f"], X_obs) # plot the data and the true latent function ax.plot(X, mean_func_true, "dodgerblue", lw=3, label="True f") ax.plot(X_obs, y, 'ok', ms=3, label="Data") ax.set_xlabel("X") ax.set_ylabel("y") plt.legend() # axis labels and title plt.xlabel("X") plt.ylabel("True f(x)") plt.title("Posterior distribution over $f(x)$ at the observed values") plt.legend() #create the posterior/trace plots of the variables. lines = [
σ = pm.HalfNormal("σ", sd=2.0) y_ = pm.StudentT('y', mu=f, sd=σ, nu=1, observed=split_vals) # y_ = pm.Normal('y', mu = f, sigma = σ, observed = y) # this line calls an optimizer to find the MAP #mp = pm.find_MAP(include_transformed=True) trace = pm.sample(1000, chains=1) fig = plt.figure(figsize=(12, 5)) ax = fig.gca() # plot the samples from the gp posterior with samples and shading from pymc3.gp.util import plot_gp_dist plot_gp_dist(ax, trace["f"], freqs) # plot the data and the true latent function ax.plot(freqs, split_vals, "dodgerblue", lw=3, label="True f") ax.set_xlabel("X") ax.set_ylabel("y") plt.legend() # axis labels and title plt.xlabel("X") plt.ylabel("True f(x)") plt.title("Posterior distribution over $f(x)$ at the observed values") plt.legend() #create the posterior/trace plots of the variables. lines = [
#%% Predict new values from x=0 to x=20 X_new = np.linspace(0, 20, 600)[:, None] # add the GP conditional to the model, given the new X values with model: f_pred = gp.conditional("f_pred", X_new) # To use the MAP values, you can just replace the trace with a length-1 list with `mp` with model: pred_samples = pm.sample_posterior_predictive([mp0], vars=[f_pred], samples=1000) #%% Plot the results fig = plt.figure(figsize=(12, 5)) ax = fig.gca() # plot the samples from the gp posterior with samples and shading from pymc3.gp.util import plot_gp_dist plot_gp_dist(ax, pred_samples["f_pred"], X_new) # plot the data and the true latent function plt.plot(X, f_true, "dodgerblue", lw=3, label="True f") plt.plot(X, y, 'ok', ms=3, alpha=0.5, label="Observed data") # axis labels and title plt.xlabel("X") plt.ylim([-13, 13]) plt.title("Posterior distribution over $f(x)$ at the observed values") plt.legend()
sigma = pm.HalfCauchy("sigma", beta=5) nu = pm.Gamma("nu", alpha=2, beta=0.1) y_ = pm.StudentT("y", mu=f, lam=1.0 / sigma, nu=nu, observed=y) trace = pm.sample(200, n_init=100, tune=100, chains=2, cores=2, return_inferencedata=True) az.to_netcdf(trace, 'src/experiments/results/lat_gp_trace') # check Rhat, values above 1 may indicate convergence issues n_nonconverged = int(np.sum(az.rhat(trace)[["eta", "l", "f_rotated_"]].to_array() > 1.03).values) print("%i variables MCMC chains appear not to have converged." % n_nonconverged) # plot the results fig = plt.figure(figsize=(12, 5)) ax = fig.gca() # plot the samples from the gp posterior with samples and shading from pymc3.gp.util import plot_gp_dist plot_gp_dist(ax, trace.posterior["f"][0, :, :], X) # plot the data and the true latent function ax.plot(X, f_true, "dodgerblue", lw=3, label="True generating function 'f'") ax.plot(X, y, "ok", ms=3, label="Observed data") # axis labels and title plt.xlabel("X") plt.ylabel("True f(x)") plt.title("Posterior distribution over $f(x)$ at the observed values") plt.legend() plt.show()
# The Gaussian process is a sum of these three components σ = pm.HalfNormal("σ", sd=2.0) y_ = pm.StudentT('y', mu=f, sd=σ, nu=1, observed=y) # y_ = pm.Normal('y', mu = f, sigma = σ, observed = y) # this line calls an optimizer to find the MAP #mp = pm.find_MAP(include_transformed=True) trace = pm.sample(1000, chains=1) fig = plt.figure(figsize=(12, 5)) ax = fig.gca() # plot the samples from the gp posterior with samples and shading from pymc3.gp.util import plot_gp_dist plot_gp_dist(ax, trace["f"], freqs[:-1]) # plot the data and the true latent function ax.plot(freqs[:-1], vals, "dodgerblue", lw=3, label="True f") ax.plot(freqs[:-1], y, 'ok', ms=3, label="Data") ax.set_xlabel("X") ax.set_ylabel("y") plt.legend() # axis labels and title plt.xlabel("X") plt.ylabel("True f(x)") plt.title("Posterior distribution over $f(x)$ at the observed values") plt.legend() #create the posterior/trace plots of the variables. lines = [
trace = pm.sample(1000, chains=1) # add the GP conditional to the model, given the new X values with temp_model: f_pred = gp.conditional("f_pred", X, pred_noise=True) # To use the MAP values, you can just replace the trace with a length-1 list with `mp` with temp_model: pred_samples = pm.sample_ppc(trace, vars=[f_pred], samples=10) x = X.flatten() # plot the results fig = plt.figure(figsize=(12, 5)) ax = fig.gca() # plot the samples from the gp posterior with samples and shading from pymc3.gp.util import plot_gp_dist plot_gp_dist(ax, pred_samples["f_pred"], x) # plot the data and the true latent function plt.plot(x, y, 'ok', ms=3, alpha=0.5, label="Observed data") plt.plot(Xu, 10 * np.ones(Xu.shape[0]), "co", ms=10, label="Inducing point locations") # axis labels and title plt.xlabel("X") plt.title("Posterior distribution over $f(x)$ at the observed values") plt.legend()