#ax.legend(loc=1); # Calculate class membership, by using hmc_trace and logp_normal function: #confidenceThreshold = 0.95 #class0LogProb = [logp_normal(np.mean(hmc_trace.get_values('mu_0')), np.mean(hmc_trace.get_values('sigma')[:,0]) , data[k,channel]) for k in np.where(sectionNumber == section)] #class1LogProb = [logp_normal(np.mean(hmc_trace.get_values('mu_1')), np.mean(hmc_trace.get_values('sigma')[:,1]) , data[k,channel]) for k in np.where(sectionNumber == section)] #normalizedProbs = [exp_normalize(np.array((class0LogProb[0][i], class1LogProb[0][i]))) for i in range(len(class0LogProb[0]))] #maxProbs = [max(normalizedProbs[i]) for i in range(len(normalizedProbs))] #classMembership = [np.argmax(normalizedProbs[i]) for i in range(len(normalizedProbs))] #confidentClass = [2 if maxProbs[i] < confidenceThreshold else classMembership[i] for i in range(len(classMembership))] # Calculate class membership, by using advi_trace and logp_normal function: confidenceThreshold = 0.95 class0LogProb = [ logp_normal(np.mean(advi_trace.get_values('mu')[:, 0]), np.mean(advi_trace.get_values('sigma')[:, 0]), data[k, channel]) for k in range(np.shape(data)[0]) ] class1LogProb = [ logp_normal(np.mean(advi_trace.get_values('mu')[:, 1]), np.mean(advi_trace.get_values('sigma')[:, 1]), data[k, channel]) for k in range(np.shape(data)[0]) ] normalizedProbs = [ exp_normalize(np.array((class0LogProb[i], class1LogProb[i]))) for i in range(len(class0LogProb)) ] maxProbs = [ max(normalizedProbs[i]) for i in range(len(normalizedProbs)) ] classMembership = [
def logp_(value): logps = [tt.log(pi[i]) + logp_normal(mus[i], sigmas[i], value) for i in range(3)] return tt.sum(logsumexp(tt.stacklists(logps), axis=0))
def logp_gmix(mus, pi, sigmas): def logp_(value): logps = [tt.log(pi[i]) + logp_normal(mus[i], sigmas[i], value) for i in range(3)] return tt.sum(logsumexp(tt.stacklists(logps), axis=0)) return logp_ with pm.Model() as model: w = pm.Dirichlet('w', alpha) mus = pm.Normal('mu', mu = mean_priorMean, sigma = mean_priorSigma, shape = (n_components, n_dimensions)) sigmas = pm.Gamma('sigma', mu = sigma_priorMean, sigma = sigma_priorSigma, shape = (n_components, n_dimensions)) c = pm.Normal('c', mu = spectralSignature_priorMean, sigma = spectralSignature_priorSigma, shape = (n_dimensions, n_dimensions)) prior = sample_prior(samples = 1000) data_corrected = tt.log(tt.dot(data,tt.inv(c))) x = pm.DensityDist('x', logp_gmix(mus, w, sigmas), observed=data_corrected) # Plot prior for some parameters: f = plt.figure() plt.hist(prior['mu'][:,:,0]) plt.show() f.savefig("figures/" + slideNames[slide] + "/" + "muPriorSection" + str(section) + "channel" + str(channel) + ".png", bbox_inches='tight') plt.close(f) f = plt.figure() plt.hist(prior['taus_0'][:,1]) plt.show() f.savefig("figures/" + slideNames[slide] + "/" + "sigmaPriorSection" + str(section) + "channel" + str(channel) + ".png", bbox_inches='tight') plt.close(f) f = plt.figure() plt.hist(prior['w']) plt.show() f.savefig("figures/" + slideNames[slide] + "/" + "wPriorSection" + str(section) + "channel" + str(channel) + ".png", bbox_inches='tight') plt.close(f) # Fit: with model: advi_fit = pm.fit(n=500, obj_optimizer=pm.adagrad(learning_rate=1e-1), method = 'advi') # Sample: with model: %time hmc_trace = pm.sample(draws=20, tune=50, cores=15) # Show results advi: f = plt.figure() advi_elbo = pd.DataFrame( {'log-ELBO': -np.log(advi_fit.hist), 'n': np.arange(advi_fit.hist.shape[0])}) _ = sns.lineplot(y='log-ELBO', x='n', data=advi_elbo) f.savefig("figures/" + slideNames[slide] + "/" + "adviElbo_Section" + str(section) + "_channel" + str(channel) + ".png", bbox_inches='tight') plt.close(f) advi_trace = advi_fit.sample(10000) pm.summary(advi_trace, include_transformed=False) # Plot of all component distributions in each channel f, axis = plt.subplots(n_components,n_dimensions, figsize=(n_components*2.5,n_dimensions*2.5)) plt.rcParams['axes.titlesize'] = 10 plt.rcParams['axes.facecolor'] = 'white' dotSize = 0.5 colours = ('gold', 'pink','green', 'red', 'blue') x_min = 6 x_max = 12 x = np.linspace(x_min, x_max, 100) for i in range(n_components): for j in range(n_dimensions): axis[i,j].plot(x, scipy.stats.norm.pdf(x,np.mean(advi_trace.get_values('mu')[:,i,j]), np.mean(advi_trace.get_values('sigma')[:,i,j])), color=colours[j]) mean_posteriorMean = np.zeros((n_components,n_dimensions)) for i in range(n_components): for j in range(n_dimensions): mean_posteriorMean[i,j] = np.mean(advi_trace.get_values('mu')[:,i,j]) # Show results hmc: f, axis = plt.subplots(n_components,n_dimensions, figsize=(n_components*2.5,n_dimensions*2.5)) plt.rcParams['axes.titlesize'] = 10 plt.rcParams['axes.facecolor'] = 'white' dotSize = 0.5 colours = ('gold', 'pink','green', 'red', 'blue') x_min = 6 x_max = 12 x = np.linspace(x_min, x_max, 100) for i in range(n_components): for j in range(n_dimensions): axis[i,j].plot(x, scipy.stats.norm.pdf(x,np.mean(hmc_trace.get_values('mu')[:,i,j]), np.mean(hmc_trace.get_values('sigma')[:,i,j])), color=colours[j]) mean_posteriorMean = np.zeros((n_components,n_dimensions)) for i in range(n_components): for j in range(n_dimensions): mean_posteriorMean[i,j] = np.mean(hmc_trace.get_values('mu')[:,i,j]) # Save trace means: advi_mus = np.array([[np.mean(advi_trace.get_values('mu')[:,i,j]) for i in range(n_components)] for j in range(n_dimensions)]) advi_sigmas = np.array([[np.mean(advi_trace.get_values('sigma')[:,i,j]) for i in range(n_components)] for j in range(n_dimensions)]) advi_w = np.array([np.mean(advi_trace.get_values('w')[:,i]) for i in range(n_components)]) advi_data = {"advi_mu": advi_mus, "advi_sigma": advi_sigmas, "advi_w": advi_w} pickle_out = open("data/" + slideNames[slide] + '_AdviFitResults.pickle',"wb") pickle.dump(advi_data, pickle_out) pickle_out.close() # Calculate class membership, by using advi_trace and logp_normal function: confidenceThreshold = 0.66 class0LogProb = [logp_normal(np.mean(advi_trace.get_values('mu')[:,0]), np.mean(advi_trace.get_values('sigma')[:,0]) , data[k,channel]) for k in np.where(sectionNumber == section)] class1LogProb = [logp_normal(np.mean(advi_trace.get_values('mu')[:,1]), np.mean(advi_trace.get_values('sigma')[:,1]) , data[k,channel]) for k in np.where(sectionNumber == section)] normalizedProbs = [exp_normalize(np.array((class0LogProb[0][i], class1LogProb[0][i]))) for i in range(len(class0LogProb[0]))] maxProbs = [max(normalizedProbs[i]) for i in range(len(normalizedProbs))] classMembership = [np.argmax(normalizedProbs[i]) for i in range(len(normalizedProbs))] confidentClass = [2 if maxProbs[i] < confidenceThreshold else classMembership[i] for i in range(len(classMembership))] # Class membership probability: pickle_out = open("data/" + slideNames[slide] + "Probability-" + celltypeOrder[channel] + '.pickle',"wb") pickle.dump(normalizedProbs, pickle_out) pickle_out.close() ### Plot results: # Histograms: if sum(np.array(confidentClass) == 1) > 0: boundary1 = min(data[sectionNumber == section,channel][np.array(confidentClass) == 1]) else: boundary1 = np.inf if sum(np.array(confidentClass) == 2) > 0: boundary2 = min(data[sectionNumber == section,channel][np.array(confidentClass) == 2]) else: boundary2 = 0 fig = plt.figure() fig, ax = plt.subplots() N, bins, patches = ax.hist(data[sectionNumber == section,channel], edgecolor='white', linewidth=1, bins = 100) for i in range(0, len(patches)): if bins[i] < boundary2: patches[i].set_facecolor('b') elif bins[i] < boundary1: patches[i].set_facecolor('black') else: patches[i].set_facecolor('r') plt.gca().set_title('Log Intensity and Classification Channel ' + channelOrder[channel]) plt.show() fig.savefig("figures/" + slideNames[slide] + "/" + "HistogramIntensityAndClassification" + "Section" + str(section) + "channel" + str(channel) + ".png", bbox_inches='tight') plt.close(fig) # Scatterplots: colours = np.repeat('black', sum(sectionNumber == section)) if sum(np.array(confidentClass) == 1) > 0: colours[np.array(confidentClass) == 1] = 'red' fig = plt.figure() plt.scatter(kptn_data[sectionNumber == section,0], np.exp(data[sectionNumber == section,channel]), c = colours, s = 0.1) plt.gca().set_title('Intensity and Classification Channel ' + channelOrder[channel]) plt.show() fig.savefig("figures/" + slideNames[slide] + "/" + "ScatterPlotIntensityAndClassification" + "Section" + str(section) + "channel" + channelOrder[channel] + ".png", bbox_inches='tight') plt.close(fig) fig = plt.figure() plt.scatter(kptn_data[sectionNumber == section,0], data[sectionNumber == section,channel], c = colours, s = 0.1) plt.gca().set_title('Log Intensity and Classification Channel ' + channelOrder[channel]) plt.show() fig.savefig("figures/" + slideNames[slide] + "/" + "ScatterPlotLOGIntensityAndClassification" + "Section" + str(section) + "channel" + channelOrder[channel] + ".png", bbox_inches='tight') plt.close(fig) # Slide location of each cell type (including unclassified): fig = plt.figure() plt.scatter(kptn_data[sectionNumber == section,0][np.array(confidentClass) == 1], kptn_data[sectionNumber == section,1][np.array(confidentClass) == 1], s = 0.05) plt.gca().set_title('Nuclei Positive Classification Slide ' + str(slide) + " Section " + str(section) + " Channel " + channelOrder[channel] + ".png") plt.show() fig.savefig("figures/" + slideNames[slide] + "/" + "PositiveClassificationPosition" + str(slide) + "section" + str(section) + "channel" + channelOrder[channel] + ".png", bbox_inches='tight') plt.close(fig)
def logp_(value): logps = [tt.log(pi[i]) + logp_normal(mus[i,:], taus[i], value) for i in range(n_components)] return tt.sum(logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0))
# Fit: with model: advi_fit = pm.fit(n=2000, obj_optimizer=pm.adagrad(learning_rate=1e-1), method = 'advi') # Show results advi: advi_elbo = pd.DataFrame( {'log-ELBO': -np.log(advi_fit.hist), 'n': np.arange(advi_fit.hist.shape[0])}) _ = sns.lineplot(y='log-ELBO', x='n', data=advi_elbo) advi_trace = advi_fit.sample(10000) pm.summary(advi_trace, include_transformed=False) # Calculate class membership, by using advi_trace and logp_normal function: confidenceThreshold = 0.95 class0LogProb = [logp_normal(np.mean(advi_trace.get_values('mu')[:,0]), np.mean(advi_trace.get_values('sigma')[:,0]) , data[k,channel]) for k in np.where(sectionNumber == section)] class1LogProb = [logp_normal(np.mean(advi_trace.get_values('mu')[:,1]), np.mean(advi_trace.get_values('sigma')[:,1]) , data[k,channel]) for k in np.where(sectionNumber == section)] normalizedProbs = [exp_normalize(np.array((class0LogProb[0][i], class1LogProb[0][i]))) for i in range(len(class0LogProb[0]))] maxProbs = [max(normalizedProbs[i]) for i in range(len(normalizedProbs))] classMembership = [np.argmax(normalizedProbs[i]) for i in range(len(normalizedProbs))] confidentClass = [2 if maxProbs[i] < confidenceThreshold else classMembership[i] for i in range(len(classMembership))] # i.e. a value of 2 corresponds to a classification below our confidence threshold, unlike values 0 or 1 ### Plot results: # Histograms: if sum(np.array(confidentClass) == 1) > 0: boundary1 = min(data[sectionNumber == section,channel][np.array(confidentClass) == 1]) # i.e. what's the minimum value to confidently classify a cell as positive for this marker? else: boundary1 = np.inf
#ax.legend(loc=1); # Calculate class membership, by using hmc_trace and logp_normal function: #confidenceThreshold = 0.95 #class0LogProb = [logp_normal(np.mean(hmc_trace.get_values('mu_0')), np.mean(hmc_trace.get_values('sigma')[:,0]) , data[k,channel]) for k in np.where(sectionNumber == section)] #class1LogProb = [logp_normal(np.mean(hmc_trace.get_values('mu_1')), np.mean(hmc_trace.get_values('sigma')[:,1]) , data[k,channel]) for k in np.where(sectionNumber == section)] #normalizedProbs = [exp_normalize(np.array((class0LogProb[0][i], class1LogProb[0][i]))) for i in range(len(class0LogProb[0]))] #maxProbs = [max(normalizedProbs[i]) for i in range(len(normalizedProbs))] #classMembership = [np.argmax(normalizedProbs[i]) for i in range(len(normalizedProbs))] #confidentClass = [2 if maxProbs[i] < confidenceThreshold else classMembership[i] for i in range(len(classMembership))] # Calculate class membership, by using advi_trace and logp_normal function: confidenceThreshold = 0.95 class0LogProb = [ logp_normal(np.mean(advi_trace.get_values('mu')[:, 0]), np.mean(advi_trace.get_values('sigma')[:, 0]), data[k, channel]) for k in np.where(sectionNumber == section) ] class1LogProb = [ logp_normal(np.mean(advi_trace.get_values('mu')[:, 1]), np.mean(advi_trace.get_values('sigma')[:, 1]), data[k, channel]) for k in np.where(sectionNumber == section) ] normalizedProbs = [ exp_normalize( np.array((class0LogProb[0][i], class1LogProb[0][i]))) for i in range(len(class0LogProb[0])) ] maxProbs = [