def RunClusterAcc(dataset1, dataset2, gene_dataset, plotname): cluster1 = KMeans(len(dataset1.cell_types)) cluster2 = KMeans(len(dataset2.cell_types)) latent1 = np.genfromtxt('../harmonization/Seurat_data/' + plotname + '.1.CCA.txt') latent2 = np.genfromtxt('../harmonization/Seurat_data/' + plotname + '.2.CCA.txt') latent, batch_indices, labels, keys, stats = run_model('readSeurat', gene_dataset, dataset1, dataset2, filename=plotname) res_seurat = computeARI(latent1, latent2, latent, cluster1, cluster2, batch_indices) latent, batch_indices, labels, keys, stats = run_model('MNN', gene_dataset, dataset1, dataset2, filename=plotname) res_MNN = computeARI(latent1, latent2, latent, cluster1, cluster2, batch_indices) latent, batch_indices, labels, keys, stats = run_model('PCA', gene_dataset, dataset1, dataset2, filename=plotname) res_PCA = computeARI(latent1, latent2, latent, cluster1, cluster2, batch_indices) dataset1, dataset2, gene_dataset = SubsetGenes(dataset1, dataset2, gene_dataset, plotname) latent1, _, _, _, _ = run_model('vae', dataset1, 0, 0, filename=plotname, rep='vae1') latent2, _, _, _, _ = run_model('vae', dataset2, 0, 0, filename=plotname, rep='vae2') latent, batch_indices, labels, keys, stats = run_model('vae', gene_dataset, dataset1, dataset2, filename=plotname) res_scvi = computeARI(latent1, latent2, latent, cluster1, cluster2, batch_indices) latent, batch_indices, labels, keys, stats = run_model('vae_nb', gene_dataset, dataset1, dataset2, filename=plotname) res_scvi_nb = computeARI(latent1, latent2, latent, cluster1, cluster2, batch_indices) latent, batch_indices, labels, keys, stats = run_model('scanvi1', gene_dataset, dataset1, dataset2, filename=plotname) res_scanvi1 = computeARI(latent1, latent2, latent, cluster1, cluster2, batch_indices) latent, batch_indices, labels, keys, stats = run_model('scanvi2', gene_dataset, dataset1, dataset2, filename=plotname) res_scanvi2 = computeARI(latent1, latent2, latent, cluster1, cluster2, batch_indices) res = [ res_scvi, res_scvi_nb, res_scanvi1, res_scanvi2, res_seurat, res_MNN, res_PCA ] res = np.asarray(res) np.savetxt("%s.clusterScore.csv" % (plotname), res, "%.4f", ',')
from scvi.dataset.dataset import GeneExpressionDataset from scvi.harmonization.utils_chenling import SubsetGenes import pickle as pkl f = open('../%s/gene_dataset.pkl'%plotname, 'rb') all_dataset, dataset1, dataset2 = pkl.load(f) f.close() all_dataset = GeneExpressionDataset.concat_datasets(dataset1,dataset2) dataset1, dataset2, gene_dataset = SubsetGenes(dataset1, dataset2, all_dataset, plotname) import time from scvi.harmonization.utils_chenling import run_model start = time.time() latent, batch_indices, labels, keys, stats = run_model('scmap', gene_dataset, dataset1, dataset2,filename=plotname) end = time.time() print( end - start) batch = gene_dataset.batch_indices.ravel() labels = gene_dataset.labels.ravel() scaling_factor = gene_dataset.X.mean(axis=1) norm_X = gene_dataset.X / scaling_factor.reshape(len(scaling_factor), 1) index_0 = np.where(batch == 0)[0] index_1 = np.where(batch == 1)[0] X1 = np.log(1 + norm_X[index_0]) X2 = np.log(1 + norm_X[index_1]) coral = CORAL()
pbmc2.filter_cell_types(newCellType) gene_dataset = GeneExpressionDataset.concat_datasets(pbmc, pbmc2) # _,_,_,_,_ = run_model('writedata', gene_dataset, pbmc, pbmc2,filename=plotname+'.' # +celltype1.replace(' ','')+'.' # +celltype2.replace(' ','')) rmCellTypes = '.' + celltype1.replace( ' ', '') + '.' + celltype2.replace(' ', '') latent1 = np.genfromtxt('../harmonization/Seurat_data/' + plotname + rmCellTypes.replace(' ', '') + '.1.CCA.txt') latent2 = np.genfromtxt('../harmonization/Seurat_data/' + plotname + rmCellTypes.replace(' ', '') + '.2.CCA.txt') latent, batch_indices, labels, keys, stats = run_model( 'readSeurat', gene_dataset, pbmc, pbmc2, filename=plotname + rmCellTypes.replace(' ', '')) acc, cell_type = KNNpurity(latent1, latent2, latent, batch_indices.ravel(), labels, keys) f.write('Seurat' + '\t' + rmCellTypes + ("\t%.4f" * 8 + "\t%s" * 8 + "\n") % tuple(list(acc) + list(cell_type))) be, temp1 = BEbyType(keys, latent, labels, batch_indices, celltype1) g.write('Seurat' + '\t' + rmCellTypes + ("\t%.4f" * 8 + "\t%s" * 8 + "\n") % tuple(be + list(temp1))) plotUMAP(latent, plotname, 'Seurat', gene_dataset.cell_types, rmCellTypes, gene_dataset.batch_indices.ravel()) pbmc, pbmc2, gene_dataset = SubsetGenes(