"correlation between the cell-type composition of the subsampled dataset is %.3f" % correlation) sub_dataset = deepcopy(gene_dataset) sub_dataset.update_cells(np.concatenate(cells)) vae = VAE(sub_dataset.nb_genes, n_batch=sub_dataset.n_batches, n_labels=sub_dataset.n_labels, n_hidden=128, dispersion='gene') infer = VariationalInference(vae, sub_dataset, use_cuda=use_cuda) infer.train(n_epochs=250) latent, batch_indices, labels = infer.get_latent('sequential') keys = sub_dataset.cell_types batch_entropy = entropy_batch_mixing(latent, batch_indices) print("Entropy batch mixing :", batch_entropy) sample = select_indices_evenly(1000, labels) res = knn_purity_avg(latent[sample, :], labels[sample].astype('int'), keys=keys, acc=True) print('average classification accuracy per cluster') for x in res: print(x) knn_acc = np.mean([x[1] for x in res]) print("average KNN accuracy:", knn_acc) res = clustering_scores( np.asarray(latent)[sample, :], labels[sample], 'knn', len(np.unique(labels[sample]))) for x in res: print(x, res[x])
from scvi.metrics.clustering import entropy_batch_mixing dataset1 = MacoskoDataset() dataset2 = RegevDataset() SEURAT = SEURAT() seurat1 = SEURAT.create_seurat(dataset1,0) seurat2 = SEURAT.create_seurat(dataset2,1) ro.r.assign("seurat1", seurat1) ro.r.assign("seurat2", seurat2) combined = ro.r('hvg_CCA(seurat1,seurat2)') combined = SEURAT.combine_seurat(dataset1,dataset2) latent, batch_indices, labels, cell_types = SEURAT.get_cca(combined) batch_entropy = entropy_batch_mixing(latent, batch_indices) res = knn_purity_avg(latent, labels.astype('int'), cell_types, acc=True) key_color_order = [['Pvalb low', 'Pvalb', 'Pvalb 1', 'Pvalb 2'], ['Pvalb Ex_1','Pvalb Ex_2','Pvalb Ex'], ['Pvalb Astro_1','Pvalb Astro_2'], ['L2/3 IT Astro', 'L2/3 IT Macc1', 'L2/3 IT Sla_Astro', 'L2/3 IT', 'L2/3 IT Sla', 'L2/3 IT Sla_Inh'], ['Sst Tac2', 'Sst Myh8', 'Sst Etv1', 'Sst Chodl', 'Sst'], ['L5 PT_2', 'L5 PT IT', 'L5 PT_1'], ['L5 IT Tcap_1_3', 'L5 IT Tcap_2', 'L5 IT Tcap_Astro', 'L5 IT Tcap_1', 'L5 IT Tcap_L2/3', 'L5 IT Tcap_Foxp2', 'L5 IT Tcap_3'], ['L5 IT Aldh1a7_2','L5 IT Aldh1a7', 'L5 IT Aldh1a7_1'], ['L5 NP', 'L5 NP Slc17a8'], ['L6 IT Car3','L6 CT Olig','L6 IT Maf','L6 IT Ntn5 Mgp', 'L6 IT Ntn5 Inpp4b'], ['L6 CT Nxph2', 'L6 CT Astro','L6 CT', 'L6 CT Grp'], ['L6b', 'L6b F2r'], ['Lamp5 Sncg', 'Lamp5 Egln3', 'Lamp5 Slc35d3'],
elif model_type == 'Combat': COMBAT = COMBAT() latent = COMBAT.combat_pca(gene_dataset) latent = latent.T batch_indices = np.concatenate(gene_dataset.batch_indices) labels = np.concatenate(gene_dataset.labels) keys = gene_dataset.cell_types sample = select_indices_evenly(2000,batch_indices) batch_entropy = entropy_batch_mixing(latent[sample, :], batch_indices[sample]) print("Entropy batch mixing :", batch_entropy) sample = select_indices_evenly(1000,labels) res = knn_purity_avg( latent[sample, :], labels[sample], keys=keys[np.unique(labels)], acc=True ) print('average classification accuracy per cluster',np.mean([x[1] for x in res])) for x in res: print(x) res = clustering_scores(np.asarray(latent)[sample,:],labels[sample],'knn',len(np.unique(labels[sample]))) for x in res: print(x,res[x]) infer.show_t_sne(color_by="batches and labels")
latent_s = latent[sample, :] batch_s = batch_indices[sample] label_s = labels[sample] if latent_s.shape[1] != 2: latent_s = TSNE().fit_transform(latent_s) plt.figure(figsize=(10, 10)) plt.scatter(latent_s[:, 0], latent_s[:, 1], c=batch_s, edgecolors='none') plt.axis("off") plt.tight_layout() plt.savefig('../' + plotname + '.batch.png') sample_2batch = np.random.permutation(sample_2batch)[:1000] res1 = knn_purity_avg(latent[sample_2batch, :], labels[sample_2batch].astype('int'), gene_dataset.cell_types, acc=False) res2 = knn_purity_avg(latent[sample_2batch, :], labels[sample_2batch].astype('int'), gene_dataset.cell_types, acc=True) print('average KNN purity') for x in res1: print(x) print('average classification accuracy') for x in res2: print(x) # res1 = knn_purity( # latent[sample_2batch, :], labels[sample_2batch].astype('int'), batch_indices[sample_2batch], pop1, pop2,