shape=2) prior = sample_prior(samples=1000) x = pm.NormalMixture('x_obs', w, mus, sigma=sigmas, observed=data[:, channel]) # Sample: #with model: # %time hmc_trace = pm.sample(draws=300, tune=700, cores=10) # Fit: with model: advi_fit = pm.fit(n=3000, obj_optimizer=pm.adagrad(learning_rate=1e-1), method='advi') # Show results MCMC #pm.traceplot(hmc_trace) #pm.summary(hmc_trace, include_transformed=False) # # Show results advi: # f = plt.figure() # advi_elbo = pd.DataFrame( # {'log-ELBO': -np.log(advi_fit.hist), # 'n': np.arange(advi_fit.hist.shape[0])}) # _ = sns.lineplot(y='log-ELBO', x='n', data=advi_elbo) # f.savefig("figures/" + slideNames[slide] + "/" + "adviElbo_Section" + str(int(section)) + "_channel" + str(channel) + ".png", bbox_inches='tight') # plt.close(f) advi_trace = advi_fit.sample(10000)
# + # Algorithm settings nuts_kwargs = { "draws": 5000, "tune": 5000, "init": "adapt_diag", "target_accept": 0.9, "cores": 2, "random_seed": [1, 2], } advi_kwargs = { "n": 20_000, "method": "advi", "obj_n_mc": 10, "obj_optimizer": pm.adagrad(), } # - # Models to build grid = [ { "algorithm": ["ADVI", "NUTS"], "hidden": [1], "width": [50], "sigma": [0.1, 0.5, 0.75, 1.0, 1.5], "noise": [0.5], }, { "algorithm": ["ADVI", "NUTS"], "hidden": [1],
chol = pm.expand_packed_triangular(n=D, packed=packed_chol) invchol = solve_lower_triangular(chol,np.eye(D)) taus.append(tt.dot(invchol.T,invchol)) # Mixture density pi = pm.Dirichlet('pi',a=np.ones(K),shape=(K,)) B = pm.DensityDist('B', logp_gmix(mus,pi,taus), shape=(n_samples,D)) Y_hat = tt.sum(X[:,:,np.newaxis]*B.reshape((n_samples,D//2,2)),axis=1) # Model error err = pm.HalfCauchy('err',beta=10) # Data likelihood Y_logp = pm.MvNormal('Y_logp', mu=Y_hat, cov=err*np.eye(2), observed=Y) with model: approx = pm.variational.inference.fit( n=1000, obj_optimizer=pm.adagrad(learning_rate=0.1) ) plt.figure() plt.plot(approx.hist) plt.savefig('../images/Mo_'+str(K)+'G_ADVI'+data+'_lag'+str(lag)+'convergence.png') gbij = approx.gbij means = gbij.rmap(approx.mean.eval()) with open('../data/Mo'+str(K)+'G_results'+data+'_lag'+str(lag)+'.pickle','wb') as f: pickle.dump(means,f,protocol=pickle.HIGHEST_PROTOCOL)
x = pm.DensityDist('x', logp_gmix(mus, w, sigmas), observed=data[:,1:3]) # Plot prior for some parameters: # plt.hist(prior['mu'][:,:,0]) # plt.show() # plt.hist(prior['taus_0'][:,1]) # plt.show() plt.hist(prior['sigma_signal']) plt.show() # Fit: with model: advi_fit = pm.fit(n=5000, obj_optimizer=pm.adagrad(learning_rate=1e-1), method = 'advi') # Sample: with model: %time hmc_trace = pm.sample(draws=50, tune=1000, cores=16, target_accept=0.99) # Show results advi: f = plt.figure() advi_elbo = pd.DataFrame( {'log-ELBO': -np.log(advi_fit.hist), 'n': np.arange(advi_fit.hist.shape[0])}) _ = sns.lineplot(y='log-ELBO', x='n', data=advi_elbo) advi_trace = advi_fit.sample(10000) pm.summary(advi_trace, include_transformed=False) # Plot predicted and actual distribution of intensities for each channel: f, axis = plt.subplots(1,n_dimensions, figsize=(n_dimensions*10,10))
def logp_gmix(mus, pi, sigmas): def logp_(value): logps = [tt.log(pi[i]) + logp_normal(mus[i], sigmas[i], value) for i in range(3)] return tt.sum(logsumexp(tt.stacklists(logps), axis=0)) return logp_ with pm.Model() as model: w = pm.Dirichlet('w', alpha) mus = pm.Normal('mu', mu = mean_priorMean, sigma = mean_priorSigma, shape = (n_components, n_dimensions)) sigmas = pm.Gamma('sigma', mu = sigma_priorMean, sigma = sigma_priorSigma, shape = (n_components, n_dimensions)) c = pm.Normal('c', mu = spectralSignature_priorMean, sigma = spectralSignature_priorSigma, shape = (n_dimensions, n_dimensions)) prior = sample_prior(samples = 1000) data_corrected = tt.log(tt.dot(data,tt.inv(c))) x = pm.DensityDist('x', logp_gmix(mus, w, sigmas), observed=data_corrected) # Plot prior for some parameters: f = plt.figure() plt.hist(prior['mu'][:,:,0]) plt.show() f.savefig("figures/" + slideNames[slide] + "/" + "muPriorSection" + str(section) + "channel" + str(channel) + ".png", bbox_inches='tight') plt.close(f) f = plt.figure() plt.hist(prior['taus_0'][:,1]) plt.show() f.savefig("figures/" + slideNames[slide] + "/" + "sigmaPriorSection" + str(section) + "channel" + str(channel) + ".png", bbox_inches='tight') plt.close(f) f = plt.figure() plt.hist(prior['w']) plt.show() f.savefig("figures/" + slideNames[slide] + "/" + "wPriorSection" + str(section) + "channel" + str(channel) + ".png", bbox_inches='tight') plt.close(f) # Fit: with model: advi_fit = pm.fit(n=500, obj_optimizer=pm.adagrad(learning_rate=1e-1), method = 'advi') # Sample: with model: %time hmc_trace = pm.sample(draws=20, tune=50, cores=15) # Show results advi: f = plt.figure() advi_elbo = pd.DataFrame( {'log-ELBO': -np.log(advi_fit.hist), 'n': np.arange(advi_fit.hist.shape[0])}) _ = sns.lineplot(y='log-ELBO', x='n', data=advi_elbo) f.savefig("figures/" + slideNames[slide] + "/" + "adviElbo_Section" + str(section) + "_channel" + str(channel) + ".png", bbox_inches='tight') plt.close(f) advi_trace = advi_fit.sample(10000) pm.summary(advi_trace, include_transformed=False) # Plot of all component distributions in each channel f, axis = plt.subplots(n_components,n_dimensions, figsize=(n_components*2.5,n_dimensions*2.5)) plt.rcParams['axes.titlesize'] = 10 plt.rcParams['axes.facecolor'] = 'white' dotSize = 0.5 colours = ('gold', 'pink','green', 'red', 'blue') x_min = 6 x_max = 12 x = np.linspace(x_min, x_max, 100) for i in range(n_components): for j in range(n_dimensions): axis[i,j].plot(x, scipy.stats.norm.pdf(x,np.mean(advi_trace.get_values('mu')[:,i,j]), np.mean(advi_trace.get_values('sigma')[:,i,j])), color=colours[j]) mean_posteriorMean = np.zeros((n_components,n_dimensions)) for i in range(n_components): for j in range(n_dimensions): mean_posteriorMean[i,j] = np.mean(advi_trace.get_values('mu')[:,i,j]) # Show results hmc: f, axis = plt.subplots(n_components,n_dimensions, figsize=(n_components*2.5,n_dimensions*2.5)) plt.rcParams['axes.titlesize'] = 10 plt.rcParams['axes.facecolor'] = 'white' dotSize = 0.5 colours = ('gold', 'pink','green', 'red', 'blue') x_min = 6 x_max = 12 x = np.linspace(x_min, x_max, 100) for i in range(n_components): for j in range(n_dimensions): axis[i,j].plot(x, scipy.stats.norm.pdf(x,np.mean(hmc_trace.get_values('mu')[:,i,j]), np.mean(hmc_trace.get_values('sigma')[:,i,j])), color=colours[j]) mean_posteriorMean = np.zeros((n_components,n_dimensions)) for i in range(n_components): for j in range(n_dimensions): mean_posteriorMean[i,j] = np.mean(hmc_trace.get_values('mu')[:,i,j]) # Save trace means: advi_mus = np.array([[np.mean(advi_trace.get_values('mu')[:,i,j]) for i in range(n_components)] for j in range(n_dimensions)]) advi_sigmas = np.array([[np.mean(advi_trace.get_values('sigma')[:,i,j]) for i in range(n_components)] for j in range(n_dimensions)]) advi_w = np.array([np.mean(advi_trace.get_values('w')[:,i]) for i in range(n_components)]) advi_data = {"advi_mu": advi_mus, "advi_sigma": advi_sigmas, "advi_w": advi_w} pickle_out = open("data/" + slideNames[slide] + '_AdviFitResults.pickle',"wb") pickle.dump(advi_data, pickle_out) pickle_out.close() # Calculate class membership, by using advi_trace and logp_normal function: confidenceThreshold = 0.66 class0LogProb = [logp_normal(np.mean(advi_trace.get_values('mu')[:,0]), np.mean(advi_trace.get_values('sigma')[:,0]) , data[k,channel]) for k in np.where(sectionNumber == section)] class1LogProb = [logp_normal(np.mean(advi_trace.get_values('mu')[:,1]), np.mean(advi_trace.get_values('sigma')[:,1]) , data[k,channel]) for k in np.where(sectionNumber == section)] normalizedProbs = [exp_normalize(np.array((class0LogProb[0][i], class1LogProb[0][i]))) for i in range(len(class0LogProb[0]))] maxProbs = [max(normalizedProbs[i]) for i in range(len(normalizedProbs))] classMembership = [np.argmax(normalizedProbs[i]) for i in range(len(normalizedProbs))] confidentClass = [2 if maxProbs[i] < confidenceThreshold else classMembership[i] for i in range(len(classMembership))] # Class membership probability: pickle_out = open("data/" + slideNames[slide] + "Probability-" + celltypeOrder[channel] + '.pickle',"wb") pickle.dump(normalizedProbs, pickle_out) pickle_out.close() ### Plot results: # Histograms: if sum(np.array(confidentClass) == 1) > 0: boundary1 = min(data[sectionNumber == section,channel][np.array(confidentClass) == 1]) else: boundary1 = np.inf if sum(np.array(confidentClass) == 2) > 0: boundary2 = min(data[sectionNumber == section,channel][np.array(confidentClass) == 2]) else: boundary2 = 0 fig = plt.figure() fig, ax = plt.subplots() N, bins, patches = ax.hist(data[sectionNumber == section,channel], edgecolor='white', linewidth=1, bins = 100) for i in range(0, len(patches)): if bins[i] < boundary2: patches[i].set_facecolor('b') elif bins[i] < boundary1: patches[i].set_facecolor('black') else: patches[i].set_facecolor('r') plt.gca().set_title('Log Intensity and Classification Channel ' + channelOrder[channel]) plt.show() fig.savefig("figures/" + slideNames[slide] + "/" + "HistogramIntensityAndClassification" + "Section" + str(section) + "channel" + str(channel) + ".png", bbox_inches='tight') plt.close(fig) # Scatterplots: colours = np.repeat('black', sum(sectionNumber == section)) if sum(np.array(confidentClass) == 1) > 0: colours[np.array(confidentClass) == 1] = 'red' fig = plt.figure() plt.scatter(kptn_data[sectionNumber == section,0], np.exp(data[sectionNumber == section,channel]), c = colours, s = 0.1) plt.gca().set_title('Intensity and Classification Channel ' + channelOrder[channel]) plt.show() fig.savefig("figures/" + slideNames[slide] + "/" + "ScatterPlotIntensityAndClassification" + "Section" + str(section) + "channel" + channelOrder[channel] + ".png", bbox_inches='tight') plt.close(fig) fig = plt.figure() plt.scatter(kptn_data[sectionNumber == section,0], data[sectionNumber == section,channel], c = colours, s = 0.1) plt.gca().set_title('Log Intensity and Classification Channel ' + channelOrder[channel]) plt.show() fig.savefig("figures/" + slideNames[slide] + "/" + "ScatterPlotLOGIntensityAndClassification" + "Section" + str(section) + "channel" + channelOrder[channel] + ".png", bbox_inches='tight') plt.close(fig) # Slide location of each cell type (including unclassified): fig = plt.figure() plt.scatter(kptn_data[sectionNumber == section,0][np.array(confidentClass) == 1], kptn_data[sectionNumber == section,1][np.array(confidentClass) == 1], s = 0.05) plt.gca().set_title('Nuclei Positive Classification Slide ' + str(slide) + " Section " + str(section) + " Channel " + channelOrder[channel] + ".png") plt.show() fig.savefig("figures/" + slideNames[slide] + "/" + "PositiveClassificationPosition" + str(slide) + "section" + str(section) + "channel" + channelOrder[channel] + ".png", bbox_inches='tight') plt.close(fig)
# + {"slideshow": {"slide_type": "fragment"}} with model: # Sample from the posterior using the No-U-Turn sampler trace = pm.sample(draws=5000, tune=5000, init="adapt_diag", target_accept=0.9, cores=2, random_seed=[1, 2]) # Approximate the posterior using variational inference mean_field = pm.fit(20_000, method='advi', obj_n_mc=10, obj_optimizer=pm.adagrad()) trace_advi = mean_field.sample(10_000) # + {"slideshow": {"slide_type": "slide"}, "cell_type": "markdown"} # Finally, we simulate the posterior predictive for 1000 equally spaced values of $x$ and plot the results. # # First, the effect of the prior. A reasonably well-selected prior allows a BNN to adequately reflect both epistemic and aleatoric uncertainty: # # <img src="fig/NUTS_hidden_1_width_50_sigma_1.0_noise_0.5.png"> # + {"slideshow": {"slide_type": "slide"}, "cell_type": "markdown"} # Higher prior variance results in a significantly larger epistemic uncertainty. Calibration will not help in this case, since there is no data: # # <img src="fig/NUTS_hidden_1_width_50_sigma_1.5_noise_0.5.png"> # + {"slideshow": {"slide_type": "slide"}, "cell_type": "markdown"}
tt.eye(n_dimensions, n_dimensions) * sigmas[i, :] for i in range(n_components) ] taus = [ tt.nlinalg.matrix_inverse(covs[i])**2 for i in range(n_components) ] # Gaussian Mixture Model: x = DensityDist('x', logp_gmix(mus, w, taus, n_components), observed=pm.floatX(data)) with model: advi_fit = pm.fit(n=100000, obj_optimizer=pm.adagrad(learning_rate=0.01)) advi_trace = advi_fit.sample(10000) advi_summary = pm.summary(advi_trace, include_transformed=False) pickle_out = open( "advi_summaries/advi_summary_slide" + str(slide) + 'hemisphere_' + str(hemisphere) + ".pickle", "wb") pickle.dump(advi_summary, pickle_out) pickle_out.close() # advi_elbo = pd.DataFrame( # {'log-ELBO': -np.log(advi_fit.hist), # 'n': np.arange(advi_fit.hist.shape[0])}) # _ = sns.lineplot(y='log-ELBO', x='n', data=advi_elbo)
packed_chol = pm.LKJCholeskyCov('packed_chol', n=D, eta=1, sd_dist=sd_dist) chol = pm.expand_packed_triangular(n=D, packed=packed_chol) invchol = solve_lower_triangular(chol, np.eye(D)) tau = tt.dot(invchol.T, invchol) # Mixture density B = pm.DensityDist('B', logp_g(mu, tau), shape=(n_samples, D)) Y_hat = tt.sum(X[:, :, np.newaxis] * B.reshape((n_samples, D // 2, 2)), axis=1) # Model error err = pm.HalfCauchy('err', beta=10) # Data likelihood Y_logp = pm.MvNormal('Y_logp', mu=Y_hat, cov=err * np.eye(2), observed=Y) with model: approx = pm.variational.inference.fit( n=1000, obj_optimizer=pm.adagrad(learning_rate=0.1)) plt.figure() plt.plot(approx.hist) plt.savefig('../images/1G_ADVI' + data + '_lag' + str(lag) + 'convergence.png') gbij = approx.gbij means = gbij.rmap(approx.mean.eval()) with open('../data/1G_results' + data + '_lag' + str(lag) + '.pickle', 'wb') as f: pickle.dump(means, f, protocol=pickle.HIGHEST_PROTOCOL)
pm.expand_packed_triangular(n_dimensions, packed_L[i]) for i in range(n_components) ] covs = [ pm.Deterministic('cov_%d' % i, tt.dot(L[i], L[i].T)) for i in range(n_components) ] taus = [tt.nlinalg.matrix_inverse(covs[i]) for i in range(n_components)] # Gaussian Mixture Model: xs = DensityDist('x', logp_gmix(mus, w, taus, n_components), observed=pm.floatX(data)) with model: advi_fit = pm.fit(n=25000, obj_optimizer=pm.adagrad(learning_rate=1e-1)) advi_trace = advi_fit.sample(10000) advi_summary = pm.summary(advi_trace, include_transformed=False) advi_elbo = pd.DataFrame({ 'log-ELBO': -np.log(advi_fit.hist), 'n': np.arange(advi_fit.hist.shape[0]) }) _ = sns.lineplot(y='log-ELBO', x='n', data=advi_elbo) plt.savefig('books_read.png') pickle_out = open("advi_summary1.pickle", "wb") pickle.dump(advi_summary, pickle_out) pickle_out.close()
print(f'Max Gelman-Rubin: {tests.r_hat.max():.2f}') tests.T # Plot selected weights for illustration az.plot_trace(data, coords={'weight': range(2, 5)}); # + # Optionally plot autocorrelation (the list may be long). # The effective sample size above is a good alternative. # az.plot_autocorr(data, combined=True); # - # # Automatic differentiation variational inference with model: mean_field = pm.fit(20_000, method='advi', obj_n_mc=10, obj_optimizer=pm.adagrad()) trace_advi = mean_field.sample(10_000) # # Posterior predictive # + # Simulate data from the posterior predictive using the NUTS posterior x_test = np.linspace(df.x.min(), df.x.max(), num=1000) posterior_predictive = simulate_posterior_predictive(trace, nn, x_test, n_samples=10_000) # Plot the results: truth vs prediction plot_posterior_predictive(x_test, posterior_predictive, func=func, df=df, title=f'NUTS, Weight Uncertainty {sigma}, Noise {noise},\n' f'{width} Nodes in 1 Hidden Layer') diagnostics = (f'Minimum ESS: {tests.ess_mean.min():,.2f}\n' f'Max Gelman-Rubin: {tests.r_hat.max():.2f}')