예제 #1
0
파일: Prop_cor.py 프로젝트: Edouard360/scVI
        "correlation between the cell-type composition of the subsampled dataset is %.3f"
        % correlation)
    sub_dataset = deepcopy(gene_dataset)
    sub_dataset.update_cells(np.concatenate(cells))
    vae = VAE(sub_dataset.nb_genes,
              n_batch=sub_dataset.n_batches,
              n_labels=sub_dataset.n_labels,
              n_hidden=128,
              dispersion='gene')
    infer = VariationalInference(vae, sub_dataset, use_cuda=use_cuda)
    infer.train(n_epochs=250)
    latent, batch_indices, labels = infer.get_latent('sequential')
    keys = sub_dataset.cell_types
    batch_entropy = entropy_batch_mixing(latent, batch_indices)
    print("Entropy batch mixing :", batch_entropy)
    sample = select_indices_evenly(1000, labels)
    res = knn_purity_avg(latent[sample, :],
                         labels[sample].astype('int'),
                         keys=keys,
                         acc=True)
    print('average classification accuracy per cluster')
    for x in res:
        print(x)
    knn_acc = np.mean([x[1] for x in res])
    print("average KNN accuracy:", knn_acc)
    res = clustering_scores(
        np.asarray(latent)[sample, :], labels[sample], 'knn',
        len(np.unique(labels[sample])))
    for x in res:
        print(x, res[x])
예제 #2
0
from scvi.metrics.clustering import entropy_batch_mixing

dataset1 = MacoskoDataset()
dataset2 = RegevDataset()

SEURAT = SEURAT()
seurat1 = SEURAT.create_seurat(dataset1,0)
seurat2 = SEURAT.create_seurat(dataset2,1)
ro.r.assign("seurat1", seurat1)
ro.r.assign("seurat2", seurat2)
combined = ro.r('hvg_CCA(seurat1,seurat2)')

combined = SEURAT.combine_seurat(dataset1,dataset2)
latent, batch_indices, labels, cell_types = SEURAT.get_cca(combined)
batch_entropy = entropy_batch_mixing(latent, batch_indices)
res = knn_purity_avg(latent, labels.astype('int'), cell_types, acc=True)


key_color_order = [['Pvalb low', 'Pvalb', 'Pvalb 1', 'Pvalb 2'],
                    ['Pvalb Ex_1','Pvalb Ex_2','Pvalb Ex'],
                    ['Pvalb Astro_1','Pvalb Astro_2'],
                    ['L2/3 IT Astro', 'L2/3 IT Macc1', 'L2/3 IT Sla_Astro', 'L2/3 IT', 'L2/3 IT Sla', 'L2/3 IT Sla_Inh'],
                    ['Sst Tac2', 'Sst Myh8', 'Sst Etv1', 'Sst Chodl', 'Sst'],
                    ['L5 PT_2', 'L5 PT IT',  'L5 PT_1'],
                    ['L5 IT Tcap_1_3', 'L5 IT Tcap_2', 'L5 IT Tcap_Astro', 'L5 IT Tcap_1', 'L5 IT Tcap_L2/3', 'L5 IT Tcap_Foxp2', 'L5 IT Tcap_3'],
                    ['L5 IT Aldh1a7_2','L5 IT Aldh1a7', 'L5 IT Aldh1a7_1'],
                    ['L5 NP', 'L5 NP Slc17a8'],
                    ['L6 IT Car3','L6 CT Olig','L6 IT Maf','L6 IT Ntn5 Mgp', 'L6 IT Ntn5 Inpp4b'],
                    ['L6 CT Nxph2',  'L6 CT Astro','L6 CT', 'L6 CT Grp'],
                    ['L6b', 'L6b F2r'],
                    ['Lamp5 Sncg', 'Lamp5 Egln3', 'Lamp5 Slc35d3'],
예제 #3
0
elif model_type == 'Combat':
    COMBAT = COMBAT()
    latent = COMBAT.combat_pca(gene_dataset)
    latent = latent.T
    batch_indices = np.concatenate(gene_dataset.batch_indices)
    labels = np.concatenate(gene_dataset.labels)
    keys = gene_dataset.cell_types



sample = select_indices_evenly(2000,batch_indices)
batch_entropy = entropy_batch_mixing(latent[sample, :], batch_indices[sample])
print("Entropy batch mixing :", batch_entropy)


sample = select_indices_evenly(1000,labels)
res = knn_purity_avg(
    latent[sample, :], labels[sample],
    keys=keys[np.unique(labels)], acc=True
)

print('average classification accuracy per cluster',np.mean([x[1] for x in res]))
for x in res:
    print(x)

res = clustering_scores(np.asarray(latent)[sample,:],labels[sample],'knn',len(np.unique(labels[sample])))
for x in res:
    print(x,res[x])

infer.show_t_sne(color_by="batches and labels")
예제 #4
0
latent_s = latent[sample, :]
batch_s = batch_indices[sample]
label_s = labels[sample]
if latent_s.shape[1] != 2:
    latent_s = TSNE().fit_transform(latent_s)

plt.figure(figsize=(10, 10))
plt.scatter(latent_s[:, 0], latent_s[:, 1], c=batch_s, edgecolors='none')
plt.axis("off")
plt.tight_layout()
plt.savefig('../' + plotname + '.batch.png')

sample_2batch = np.random.permutation(sample_2batch)[:1000]

res1 = knn_purity_avg(latent[sample_2batch, :],
                      labels[sample_2batch].astype('int'),
                      gene_dataset.cell_types,
                      acc=False)
res2 = knn_purity_avg(latent[sample_2batch, :],
                      labels[sample_2batch].astype('int'),
                      gene_dataset.cell_types,
                      acc=True)
print('average KNN purity')
for x in res1:
    print(x)

print('average classification accuracy')
for x in res2:
    print(x)

# res1 = knn_purity(
#     latent[sample_2batch, :], labels[sample_2batch].astype('int'), batch_indices[sample_2batch], pop1, pop2,