def runDESC(adata, batch, res=0.8, ncores=None, tmp_dir='/localscratch/tmp_desc/', use_gpu=False): """ Convenience function to run DESC. Parametrization was taken from: https://github.com/eleozzr/desc/issues/28 as suggested by the developer (rather than from the tutorial notebook). """ import desc # Set number of CPUs to all available if ncores is None: ncores = os.cpu_count() adata_out = adata.copy() adata_out = desc.scale_bygroup(adata_out, groupby=batch, max_value=6) adata_out = desc.train(adata_out, dims=[adata.shape[1], 128, 32], tol=0.001, n_neighbors=10, batch_size=256, louvain_resolution=res, save_encoder_weights=False, save_dir=tmp_dir, do_tsne=False, use_GPU=use_gpu, num_Cores=ncores, use_ae_weights=False, do_umap=False) adata.obsm['X_emb'] = adata_out.obsm['X_Embeded_z' + str(res)] return adata
desc.scale(adata, zero_center=True, max_value=3) # In[13]: sc.pp.scale(adata,max_value=6)# if the the dataset has two or more batches you can use `adata=desc.scale(adata,groupby="BatchID")` save_dir="test_DESC" adata=desc.train(adata, dims=[adata.shape[1],64,32], tol=0.005, n_neighbors=10, batch_size=256, louvain_resolution=[1.0],# not necessarily a list, you can only set one value, like, louvain_resolution=1.0 save_dir=str(save_dir), do_tsne=True, learning_rate=200, # the parameter of tsne use_GPU=False, num_Cores=1, #for reproducible, only use 1 cpu num_Cores_tsne=4, save_encoder_weights=False, save_encoder_step=3,# save_encoder_weights is False, this parameter is not used use_ae_weights=False, do_umap=True) #if do_umap is False, it will don't compute umap coordiate # In[14]: adata
min_disp=0.5, subset=True) adata = adata[:, adata.var['highly_variable']] desc.scale( adata, zero_center=True, max_value=3 ) # if the the dataset has two or more batches you can use `adata=desc.scale(adata,groupby="BatchID")` save_dir = "h5_result" adata = desc.train(adata, dims=[adata.shape[1], 128, 32], tol=0.005, n_neighbors=10, batch_size=256, louvain_resolution=[1.0], save_dir=str(save_dir), do_tsne=True, learning_rate=200, use_GPU=False, num_Cores=2, num_Cores_tsne=4, save_encoder_weights=False, save_encoder_step=3, use_ae_weights=False, do_umap=True, max_iter=100, pretrain_epochs=5) adata.obs['max.prob'] = adata.uns["prob_matrix1.0"].max(1) sc.pl.scatter(adata, basis="tsne1.0", color=['desc_1.0', 'max.prob']) sc.pl.scatter(adata, basis="umap1.0", color=['desc_1.0', 'max.prob'])
import desc import scanpy as sc from scbean.tools import utils as tl file1 = 'dropviz/mouse_brain_dropviz_filtered.h5ad' file2 = 'nuclei/adata_nuclei_filtered.h5ad' desc_path = 'results/desc_mouse.h5ad' adata1 = sc.read_h5ad(file1) adata2 = sc.read_h5ad(file2) adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=2000) adata_all.X = adata_all.X.A adata_out = desc.train(adata_all, dims=[adata_all.shape[1], 32, 16], tol=0.03, n_neighbors=10, batch_size=256, save_dir="result", do_tsne=False, learning_rate=300, do_umap=True, pretrain_epochs=50, louvain_resolution=0.8, save_encoder_weights=False, use_GPU=True) adata_out.write_h5ad(desc_path)
# batch_size=256, louvain_resolution=[0.8], # save_dir="result", do_tsne=False, learning_rate=300, # do_umap=False, # save_encoder_weights=False) # result = fn_thread.result() # # finally: # monitor.keep_measuring = False # max_usage = mem_thread.result() # # print(f"Peak memory usage: {max_usage/1024/1024/1024} GB") desc.normalize_per_cell(adata, counts_per_cell_after=1e4) adata_out = desc.train(adata, dims=[adata.shape[1], 32, 16], tol=0.03, n_neighbors=10, batch_size=256, save_dir="result", do_tsne=False, learning_rate=100, do_umap=False, save_encoder_weights=False, use_earlyStop=True, use_GPU=True) t1 = time.time() # info = psutil.virtual_memory() # print('内存使用:', psutil.Process(os.getpid()).memory_info().rss/1024/1024/1024, 'GB') # print('总内存:', info.total/1024/1024/1024, 'GB') # print('内存占比:', info.percent) # print('cpu个数:', psutil.cpu_count()) t = (t1-t0)/60 print("Total time running: %s min" % (str(t))) # # result = [psutil.Process(os.getpid()).memory_info().rss/1024/1024/1024, t1-t0] # result = np.array(result) # np.savetxt(base_path+'desc_memo.txt', result)
import pandas as pd import scanpy.api as sc args = sys.argv adata = sc.read_loom(args[1], sparse = True) type = args[2] latent = int(args[3]) if type == 'counts': sc.pp.normalize_total(adata, target_sum=1e4) sc.pp.log1p(adata) adata.raw = adata desc.scale(adata, zero_center = True, max_value = 3) #desc.normalize_per_cell(adata, counts_per_cell_after=1e4) #desc.log1p(adata) #adata.raw = adata #desc.scale(adata, zero_center = True, max_value = 3) adata_train = desc.train(adata, dims=[adata.shape[1], 128, 8], n_neighbors=10, tol=0.1, save_dir="desc_result", do_tsne=False, do_umap=False, use_GPU=True, max_iter = 100, save_encoder_weights=True) obsm_data=pd.DataFrame(adata.obsm["X_Embeded_z0.8"]) obsm_data.to_csv(args[1] + ".csv", sep=",")
import matplotlib import ttools as tool matplotlib.use('TkAgg') base_path = '/Users/zhongyuanke/data/' file_rna = '/Users/zhongyuanke/data/dann_vae/multimodal/rna.h5ad' file_atac = '/Users/zhongyuanke/data/dann_vae/multimodal/atac.h5ad' seurat_celltype_path = base_path + 'multimodal/atac_pbmc_10k/celltype_filt.csv' batch_size = 128 adata1 = sc.read_h5ad(file_atac) adata2 = sc.read_h5ad(file_rna) print(adata1) print(adata2) # sc.pp.filter_genes(adata1, min_cells=100) # sc.pp.filter_genes(adata2, min_cells=100) # sc.pp.log1p(adata1) # sc.pp.log1p(adata2) # sc.pp.scale(adata2) # adata2.obs['celltype'] = adata1.obs['celltype'] # adata2.write_h5ad(base_path + 'multimodal/atac_pbmc_10k/activaty_matrix_label.h5ad') adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=2000, hvg=False, lognorm=False) adata_all.X=adata_all.X.A adata_out = desc.train(adata_all, dims=[adata_all.shape[1], 32, 16], tol=0.005, n_neighbors=20, batch_size=64, save_encoder_weights=False) print(adata_out) adata_out.write_h5ad(base_path+'desc/desc_multimodal.h5ad')
flavor="seurat_v3", n_top_genes=2000, batch_key="batch", subset=True) adata = desc.scale_bygroup(adata, groupby='batch', max_value=10) adata = desc.train(adata, dims=[adata.shape[1], 128, 32], tol=0.001, n_neighbors=10, batch_size=256, louvain_resolution=[0.8], save_dir=outdir, do_tsne=False, learning_rate=300, use_GPU=True, num_Cores=1, do_umap=False, num_Cores_tsne=4, use_ae_weights=False, save_encoder_weights=False) sc.pp.neighbors(adata, use_rep="X_Embeded_z0.8") sc.tl.umap(adata, min_dist=0.1) time2 = time.time() # UMAP sc.settings.figdir = outdir plt.rcParams['figure.figsize'] = (6, 8)
max_mean=3, min_disp=0.5, subset=True) adata = adata[:, adata.var['highly_variable']] desc.scale(adata, zero_center=True, max_value=3) #Let the max value be changed print("Training") #Failure to save encoder overwrite adata = desc.train(adata, dims=[adata.shape[1], 32, 16], tol=0.005, n_neighbors=10, batch_size=256, louvain_resolution=[0.8], do_tsne=False, learning_rate=300, do_umap=False, num_Cores_tsne=4, save_encoder_weights=True) #%% from lime.lime_tabular import LimeTabularExplainer lime.lime_tabular.LimeTabularExplainer.explain_instance clusters = adata.obs['desc_0.8'] num_genes = adata.X.shape[1] num_cells = adata.X.shape[0] num_clusters = max(clusters) + 1 y_clusters = np.zeros(shape=(num_cells, num_clusters))
adata1 = sc.read_h5ad(base_path + 'blood_5w.h5ad') adata2 = sc.read_h5ad(base_path + 'bone_5w.h5ad') print(adata1) print(adata2) t0 = time.time() adata = adata1.concatenate(adata2) adata = anndata.AnnData(X=adata.X.A, obs=adata.obs, var=adata.var) desc.normalize_per_cell(adata, counts_per_cell_after=1e4) adata_out = desc.train(adata, dims=[adata.shape[1], 32, 16], tol=0.005, n_neighbors=10, batch_size=256, louvain_resolution=[0.8], save_dir="result", do_tsne=False, learning_rate=300, do_umap=False, save_encoder_weights=False) t1 = time.time() print("Total time running DAVAE 10w cells: %s seconds" % (str(t1 - t0))) time_list.append(t1 - t0) adata1 = sc.read_h5ad(base_path + 'blood_10w.h5ad') adata2 = sc.read_h5ad(base_path + 'bone_10w.h5ad') t0 = time.time() adata = adata1.concatenate(adata2) adata = anndata.AnnData(X=adata.X.A, obs=adata.obs, var=adata.var)