Esempio n. 1
0
def runDESC(adata,
            batch,
            res=0.8,
            ncores=None,
            tmp_dir='/localscratch/tmp_desc/',
            use_gpu=False):
    """
    Convenience function to run DESC. Parametrization was taken from:
    https://github.com/eleozzr/desc/issues/28
    as suggested by the developer (rather than from the tutorial notebook).
    """
    import desc

    # Set number of CPUs to all available
    if ncores is None:
        ncores = os.cpu_count()

    adata_out = adata.copy()

    adata_out = desc.scale_bygroup(adata_out, groupby=batch, max_value=6)

    adata_out = desc.train(adata_out,
                           dims=[adata.shape[1], 128, 32],
                           tol=0.001,
                           n_neighbors=10,
                           batch_size=256,
                           louvain_resolution=res,
                           save_encoder_weights=False,
                           save_dir=tmp_dir,
                           do_tsne=False,
                           use_GPU=use_gpu,
                           num_Cores=ncores,
                           use_ae_weights=False,
                           do_umap=False)

    adata.obsm['X_emb'] = adata_out.obsm['X_Embeded_z' + str(res)]

    return adata
desc.scale(adata, zero_center=True, max_value=3)


# In[13]:


sc.pp.scale(adata,max_value=6)# if the the dataset has two or more batches you can use `adata=desc.scale(adata,groupby="BatchID")`
save_dir="test_DESC"
adata=desc.train(adata,
        dims=[adata.shape[1],64,32],
        tol=0.005,
        n_neighbors=10,
        batch_size=256,
        louvain_resolution=[1.0],# not necessarily a list, you can only set one value, like, louvain_resolution=1.0
        save_dir=str(save_dir),
        do_tsne=True,
        learning_rate=200, # the parameter of tsne
        use_GPU=False,
        num_Cores=1, #for reproducible, only use 1 cpu
        num_Cores_tsne=4,
        save_encoder_weights=False,
        save_encoder_step=3,# save_encoder_weights is False, this parameter is not used
        use_ae_weights=False,
        do_umap=True) #if do_umap is False, it will don't compute umap coordiate


# In[14]:


adata

Esempio n. 3
0
                            min_disp=0.5,
                            subset=True)
adata = adata[:, adata.var['highly_variable']]

desc.scale(
    adata, zero_center=True, max_value=3
)  # if the the dataset has two or more batches you can use `adata=desc.scale(adata,groupby="BatchID")`
save_dir = "h5_result"

adata = desc.train(adata,
                   dims=[adata.shape[1], 128, 32],
                   tol=0.005,
                   n_neighbors=10,
                   batch_size=256,
                   louvain_resolution=[1.0],
                   save_dir=str(save_dir),
                   do_tsne=True,
                   learning_rate=200,
                   use_GPU=False,
                   num_Cores=2,
                   num_Cores_tsne=4,
                   save_encoder_weights=False,
                   save_encoder_step=3,
                   use_ae_weights=False,
                   do_umap=True,
                   max_iter=100,
                   pretrain_epochs=5)
adata.obs['max.prob'] = adata.uns["prob_matrix1.0"].max(1)
sc.pl.scatter(adata, basis="tsne1.0", color=['desc_1.0', 'max.prob'])
sc.pl.scatter(adata, basis="umap1.0", color=['desc_1.0', 'max.prob'])
Esempio n. 4
0
import desc
import scanpy as sc
from scbean.tools import utils as tl

file1 = 'dropviz/mouse_brain_dropviz_filtered.h5ad'
file2 = 'nuclei/adata_nuclei_filtered.h5ad'
desc_path = 'results/desc_mouse.h5ad'
adata1 = sc.read_h5ad(file1)
adata2 = sc.read_h5ad(file2)

adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=2000)

adata_all.X = adata_all.X.A
adata_out = desc.train(adata_all,
                       dims=[adata_all.shape[1], 32, 16],
                       tol=0.03,
                       n_neighbors=10,
                       batch_size=256,
                       save_dir="result",
                       do_tsne=False,
                       learning_rate=300,
                       do_umap=True,
                       pretrain_epochs=50,
                       louvain_resolution=0.8,
                       save_encoder_weights=False,
                       use_GPU=True)
adata_out.write_h5ad(desc_path)
Esempio n. 5
0
#                    batch_size=256, louvain_resolution=[0.8],
#                    save_dir="result", do_tsne=False, learning_rate=300,
#                    do_umap=False,
#                    save_encoder_weights=False)
#         result = fn_thread.result()
#
#     finally:
#         monitor.keep_measuring = False
#         max_usage = mem_thread.result()
#
#     print(f"Peak memory usage: {max_usage/1024/1024/1024} GB")
desc.normalize_per_cell(adata, counts_per_cell_after=1e4)

adata_out = desc.train(adata, dims=[adata.shape[1], 32, 16], tol=0.03, n_neighbors=10,
               batch_size=256,
               save_dir="result", do_tsne=False, learning_rate=100,
               do_umap=False,
               save_encoder_weights=False,
               use_earlyStop=True, use_GPU=True)
t1 = time.time()
# info = psutil.virtual_memory()
# print('内存使用:', psutil.Process(os.getpid()).memory_info().rss/1024/1024/1024, 'GB')
# print('总内存:', info.total/1024/1024/1024, 'GB')
# print('内存占比:', info.percent)
# print('cpu个数:', psutil.cpu_count())
t = (t1-t0)/60
print("Total time running: %s min" % (str(t)))
#
# result = [psutil.Process(os.getpid()).memory_info().rss/1024/1024/1024, t1-t0]
# result = np.array(result)
# np.savetxt(base_path+'desc_memo.txt', result)
Esempio n. 6
0
import pandas as pd
import scanpy.api as sc

args = sys.argv

adata = sc.read_loom(args[1], sparse = True)
type = args[2]
latent = int(args[3])
if type == 'counts':
	sc.pp.normalize_total(adata, target_sum=1e4)
	sc.pp.log1p(adata)
	adata.raw = adata
	desc.scale(adata, zero_center = True, max_value = 3)
#desc.normalize_per_cell(adata, counts_per_cell_after=1e4)
#desc.log1p(adata)
#adata.raw = adata
#desc.scale(adata, zero_center = True, max_value = 3)

adata_train = desc.train(adata,
				   dims=[adata.shape[1], 128, 8], n_neighbors=10,
				   tol=0.1,
                   save_dir="desc_result", 
				   do_tsne=False, 
                   do_umap=False,
				   use_GPU=True,
				   max_iter = 100,
                   save_encoder_weights=True)

obsm_data=pd.DataFrame(adata.obsm["X_Embeded_z0.8"])
obsm_data.to_csv(args[1] + ".csv", sep=",")
Esempio n. 7
0
import matplotlib
import ttools as tool
matplotlib.use('TkAgg')

base_path = '/Users/zhongyuanke/data/'
file_rna = '/Users/zhongyuanke/data/dann_vae/multimodal/rna.h5ad'
file_atac = '/Users/zhongyuanke/data/dann_vae/multimodal/atac.h5ad'
seurat_celltype_path = base_path + 'multimodal/atac_pbmc_10k/celltype_filt.csv'
batch_size = 128

adata1 = sc.read_h5ad(file_atac)
adata2 = sc.read_h5ad(file_rna)
print(adata1)
print(adata2)
# sc.pp.filter_genes(adata1, min_cells=100)
# sc.pp.filter_genes(adata2, min_cells=100)
# sc.pp.log1p(adata1)
# sc.pp.log1p(adata2)
# sc.pp.scale(adata2)
# adata2.obs['celltype'] = adata1.obs['celltype']

# adata2.write_h5ad(base_path + 'multimodal/atac_pbmc_10k/activaty_matrix_label.h5ad')
adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=2000, hvg=False, lognorm=False)
adata_all.X=adata_all.X.A

adata_out = desc.train(adata_all, dims=[adata_all.shape[1], 32, 16], tol=0.005, n_neighbors=20,
               batch_size=64,
               save_encoder_weights=False)
print(adata_out)
adata_out.write_h5ad(base_path+'desc/desc_multimodal.h5ad')
Esempio n. 8
0
                                flavor="seurat_v3",
                                n_top_genes=2000,
                                batch_key="batch",
                                subset=True)


    adata = desc.scale_bygroup(adata, groupby='batch', max_value=10)

    adata = desc.train(adata, 
                       dims=[adata.shape[1], 128, 32], 
                       tol=0.001, 
                       n_neighbors=10,
                       batch_size=256, 
                       louvain_resolution=[0.8],
                       save_dir=outdir, 
                       do_tsne=False, 
                       learning_rate=300,
                       use_GPU=True,
                       num_Cores=1,
                       do_umap=False, 
                       num_Cores_tsne=4,
                       use_ae_weights=False,
                       save_encoder_weights=False)
    
    sc.pp.neighbors(adata, use_rep="X_Embeded_z0.8")
    sc.tl.umap(adata, min_dist=0.1)
    time2 = time.time()
    
    # UMAP
    sc.settings.figdir = outdir
    plt.rcParams['figure.figsize'] = (6, 8)
Esempio n. 9
0
                            max_mean=3,
                            min_disp=0.5,
                            subset=True)
adata = adata[:, adata.var['highly_variable']]

desc.scale(adata, zero_center=True, max_value=3)
#Let the max value be changed

print("Training")
#Failure to save encoder overwrite
adata = desc.train(adata,
                   dims=[adata.shape[1], 32, 16],
                   tol=0.005,
                   n_neighbors=10,
                   batch_size=256,
                   louvain_resolution=[0.8],
                   do_tsne=False,
                   learning_rate=300,
                   do_umap=False,
                   num_Cores_tsne=4,
                   save_encoder_weights=True)

#%%
from lime.lime_tabular import LimeTabularExplainer
lime.lime_tabular.LimeTabularExplainer.explain_instance

clusters = adata.obs['desc_0.8']
num_genes = adata.X.shape[1]
num_cells = adata.X.shape[0]
num_clusters = max(clusters) + 1
y_clusters = np.zeros(shape=(num_cells, num_clusters))
Esempio n. 10
0
adata1 = sc.read_h5ad(base_path + 'blood_5w.h5ad')
adata2 = sc.read_h5ad(base_path + 'bone_5w.h5ad')
print(adata1)
print(adata2)

t0 = time.time()
adata = adata1.concatenate(adata2)
adata = anndata.AnnData(X=adata.X.A, obs=adata.obs, var=adata.var)
desc.normalize_per_cell(adata, counts_per_cell_after=1e4)

adata_out = desc.train(adata,
                       dims=[adata.shape[1], 32, 16],
                       tol=0.005,
                       n_neighbors=10,
                       batch_size=256,
                       louvain_resolution=[0.8],
                       save_dir="result",
                       do_tsne=False,
                       learning_rate=300,
                       do_umap=False,
                       save_encoder_weights=False)
t1 = time.time()
print("Total time running DAVAE 10w cells: %s seconds" % (str(t1 - t0)))
time_list.append(t1 - t0)

adata1 = sc.read_h5ad(base_path + 'blood_10w.h5ad')
adata2 = sc.read_h5ad(base_path + 'bone_10w.h5ad')

t0 = time.time()
adata = adata1.concatenate(adata2)
adata = anndata.AnnData(X=adata.X.A, obs=adata.obs, var=adata.var)