def integrate_spatial_rna(adata_spatial, adata_rna, type='anterior'): adata_all = tl.spatial_rna_preprocessing(adata_spatial, adata_rna, n_top_genes=10000) adata_integrate = davae.fit_integration( adata_all, epochs=100, batch_size=2, domain_lambda=5, sparse=True, hidden_layers=[128, 64, 32], split_by='batch', ) sc.pp.neighbors(adata_integrate, use_rep='X_davae') sc.tl.umap(adata_integrate) sc.pl.umap(adata_integrate, color='batch') len_spatial = adata_spatial.shape[0] len_rna = adata_rna.shape[0] davae_emb = adata_integrate.obsm['X_davae'] adata_spatial.obsm["davae_embedding"] = davae_emb[0:len_spatial, :] adata_rna.obsm['davae_embedding'] = davae_emb[len_spatial:len_rna + len_spatial, :] distances = 1 - cosine_distances(adata_rna.obsm["davae_embedding"], adata_spatial.obsm['davae_embedding']) class_prob_anterior = label_transfer(distances, adata_rna.obs.cell_subclass) cp_spatial_df = pd.DataFrame(class_prob_anterior, columns=np.sort( adata_rna.obs.cell_subclass.unique())) label = cp_spatial_df.idxmax(axis='columns').values cp_spatial_df.index = adata_spatial.obs.index adata_transfer = adata_spatial.copy() adata_transfer.obs = pd.concat([adata_spatial.obs, cp_spatial_df], axis=1) sc.pl.spatial( adata_transfer, img_key="hires", # color=["L2/3 IT", "L4", "L5 PT", "L6 CT"], colot=['Hpca'], size=1.5, color_map='Blues', ncols=2, legend_fontsize='xx-small') adata_spatial.obs['celltype'] = label # sc.pl.spatial( # adata_transfer, # img_key="hires", # color='celltype', # size=1.5, # color_map='Set2' # ) label = list(label) from collections import Counter print(Counter(label)) adata_spatial.write_h5ad('/Users/zhongyuanke/data/dann_vae/spatial/' + type + '_label_02.h5ad')
def deep_label_transfer(adata_spatial, adata_rna, type='anterior'): adata_all = tl.spatial_rna_preprocessing(adata_spatial, adata_rna) adata_integrate = davae.fit_integration( adata_all, epochs=45, hidden_layers=[128, 64, 32, 5], sparse=True, domain_lambda=3.0, ) sc.pp.neighbors(adata_integrate, use_rep='X_davae') sc.tl.umap(adata_integrate) sc.pl.umap(adata_integrate, color='batch') rna_celltype = adata_rna.obs.cell_subclass print(rna_celltype) encoder = LabelEncoder() orig_label = encoder.fit_transform(rna_celltype) print(orig_label) orig_label.dtype = 'int64' davae_emb = adata_integrate.obsm['X_davae'] len_spatial = adata_spatial.shape[0] len_rna = adata_rna.shape[0] test_set = davae_emb[0:len_spatial] train_set = davae_emb[len_spatial:len_spatial + len_rna] label = to_categorical(orig_label) print(label) class_num = label.shape[1] net_x = dc.CLASSIFIER(input_size=train_set.shape[1], class_num=class_num) net_x.build() net_x.compile() net_x.train(x=train_set, label=label, epochs=25, batch_size=128) pred_label = net_x.prediction(test_set) pred_label.dtype = 'int64' pred_type = encoder.inverse_transform(pred_label) # df = pd.DataFrame(pred_type) # df.to_csv('/Users/zhongyuanke/data/dann_vae/atac/pred_type_save03.csv') # np.savetxt('/Users/zhongyuanke/data/dann_vae/atac/pred_label_save03.csv', pred_label, delimiter=',') # # all_label = np.concatenate([pred_label, orig_label]) # all_type = encoder.inverse_transform(all_label) print(pred_type) type_list = list(pred_type) print(Counter(type_list)) adata_spatial.obs['celltype'] = pred_type # adata_davae.obs['cell type'] = all_type adata_spatial.write_h5ad(base_path + 'dann_vae/spatial/'+type+'_label_02.h5ad')
help="base path") parser.add_argument("--epoch", type=int, default=15, help="epochs") opt = parser.parse_args() base_path = opt.base_path epoch = opt.epoch # time_list = [] adata1 = sc.read_h5ad(base_path + 'blood_5w.h5ad') adata2 = sc.read_h5ad(base_path + 'bone_5w.h5ad') print(adata1) print(adata2) adata_all = tl.davae_preprocessing([adata1, adata2]) t0 = time.time() adata_out = davae.fit_integration(adata_all, batch_size=256, epochs=epoch, sparse=True) t1 = time.time() print("Total time running DAVAE 10w cells: %s seconds" % (str(t1 - t0))) time_list.append(t1 - t0) info = psutil.virtual_memory() print('内存使用:', psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 / 1024, 'GB') print('总内存:', info.total / 1024 / 1024 / 1024, 'GB') print('内存占比:', info.percent) print('cpu个数:', psutil.cpu_count()) adata1 = sc.read_h5ad(base_path + 'blood_10w.h5ad') adata2 = sc.read_h5ad(base_path + 'bone_10w.h5ad') print(adata1) print(adata2)
# sc.pp.highly_variable_genes(adata2, n_top_genes=6000) # adata1 = adata1[:, adata1.var.highly_variable] # adata2 = adata2[:, adata2.var.highly_variable] # # adata1.write_h5ad(file1_p) # adata2.write_h5ad(file2_p) # del adata1.var['highly_variable'] # del adata2.var['highly_variable'] # del adata1.var['means'] # del adata2.var['means'] # del adata1.var['dispersions'] # del adata2.var['dispersions'] # del adata1.var['dispersions_norm'] # del adata2.var['dispersions_norm'] print(adata1) print(adata2) adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=4000) adata_integrate = davae.fit_integration( adata_all, epochs=25, hidden_layers=[128, 64, 32, 5], sparse=True, domain_lambda=0.5, ) # import umap # adata_integrate.obsm['X_umap']=umap.UMAP().fit_transform(adata_integrate.obsm['X_davae']) sc.pp.neighbors(adata_integrate, use_rep='X_davae', n_neighbors=8) sc.tl.umap(adata_integrate) sc.pl.umap(adata_integrate, color=['_batch'], s=3) adata_integrate.write_h5ad(base_path + out_path)
import scbean.model.davae as davae import scbean.tools.utils as tl import scanpy as sc import matplotlib from numpy.random import seed seed(2021) matplotlib.use('TkAgg') adata = tl.read_sc_data('/Users/zhongyuanke/data/seurat_data/ifnb/ifnb.h5ad') datasets = tl.split_object(adata, by="stim") print(datasets[0]) print(datasets[1]) adata_all = tl.davae_preprocessing(datasets, n_top_genes=8000) adata_intagrate = davae.fit_integration( adata_all, epochs=30, hidden_layers=[128, 64, 32, 5], domain_lambda=3.0, ) print(adata_intagrate) sc.pp.neighbors(adata_intagrate, use_rep='X_davae', n_neighbors=15) sc.tl.louvain(adata_intagrate) sc.tl.umap(adata_intagrate) sc.pl.umap(adata_intagrate, color='louvain', cmap='tab20c')
matplotlib.use('TkAgg') epochs = 40 base_path = '/Users/zhongyuanke/data/' file1 = base_path + 'dann_vae/benchmark1/dc_batch1.h5ad' file2 = base_path + 'dann_vae/benchmark1/dc_batch2.h5ad' orig_path = base_path + 'dann_vae/benchmark1/orig.h5ad' # -------------train--------------------- adata1 = tl.read_sc_data(file1, fmt='h5ad') adata2 = tl.read_sc_data(file2, fmt='h5ad') adata_orig = tl.read_sc_data(orig_path, fmt='h5ad') # orig_label =adata_orig.obs['label'] print(adata1) print(adata2) adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=4000, sparse=False) adata_integrate = davae.fit_integration(adata_all, split_by='batch', epochs=1000, hidden_layers=[128, 64, 32, 2], sparse=False, domain_lambda=6) adata_integrate.obs['label'] = adata_orig.obs['label'] sc.pp.neighbors(adata_integrate, use_rep='X_davae') sc.tl.umap(adata_integrate) sc.pl.umap(adata_integrate, color=['batch', 'label'], s=10, cmap='Dark2') # print(adata_integrate) # adata_integrate.write_h5ad(base_path+'dann_vae/benchmark1/dc_davae_temp.h5ad')
type=str, default='/Users/zhongyuanke/data/', help="base path") parser.add_argument("--epoch", type=int, default=10, help="epoch of training") opt = parser.parse_args() base_path = opt.base_path out_path = 'dann_vae/hca/davae_01.h5ad' file1 = base_path + 'HCA/ica_cord_blood_h5.h5' file2 = base_path + 'HCA/ica_bone_marrow_h5.h5' adata1 = tl.read_sc_data(file1, fmt='10x_h5') adata2 = tl.read_sc_data(file2, fmt='10x_h5') adata1.var_names_make_unique() adata2.var_names_make_unique() print(adata1) adata_all = tl.davae_preprocessing([adata1, adata2], hvg=False, lognorm=False) adata_integrate = davae.fit_integration( adata_all, split_by='batch', domain_lambda=5, epochs=1, hidden_layers=[128, 64, 32, 5], sparse=True, ) sc.pp.neighbors(adata_integrate, use_rep='X_davae') sc.tl.umap(adata_integrate) sc.pl.umap(adata_integrate, color='batch') print(adata_integrate)
matplotlib.use('TkAgg') base_path = "/Users/zhongyuanke/data/vipcca/mixed_cell_lines/" adata_b1 = tl.read_sc_data(base_path + "293t.h5ad", batch_name="293t") adata_b2 = tl.read_sc_data(base_path + "jurkat.h5ad", batch_name="jurkat") adata_b3 = tl.read_sc_data(base_path + "mixed.h5ad", batch_name="mixed") adata = adata_b1.concatenate(adata_b2) adata = adata.concatenate(adata_b3) adata.write_h5ad('/Users/zhongyuanke/data/pbmc/zheng/mcl.h5ad') adata_all = tl.davae_preprocessing([adata_b1, adata_b2, adata_b3], n_top_genes=3000) print(adata_all) print(adata_all) adata_integrate = davae.fit_integration(adata_all, batch_num=3, split_by='batch_label', domain_lambda=3.0, epochs=25, sparse=True, hidden_layers=[128, 64, 32, 5]) # sc.pp.neighbors(adata_integrate, use_rep='X_davae', n_neighbors=10) # sc.tl.umap(adata_integrate) import umap adata_integrate.obsm['X_umap'] = umap.UMAP().fit_transform( adata_integrate.obsm['X_davae']) sc.pl.umap(adata_integrate, color=['_batch', 'celltype'], s=3) # adata_integrate.write_h5ad('/Users/zhongyuanke/data/dann_vae/pbmc/davae_save02.h5ad')
# loss_weight = np.array(loss_weight) # print(adata1.shape) # print(adata2.shape) # print(orig_data.shape) # data, batches, batch_label, loss_weight = shuffle(orig_data, orig_batches, orig_batch_label, loss_weight, # random_state=0) # net_x = DACVAE(input_size=data.shape[1], batches=2, latent_size=10) # net_x.build() # net_x.compile() # his = net_x.train(data, batches, loss_weight, epochs=epochs, batch_size=batch_size) # # mid = net_x.embedding(orig_data, orig_batches) # adata_mid = anndata.AnnData(X=mid) # adata_mid.obs['batch'] = orig_batch_label # adata.obsm['davae'] = mid adata_all = tl.davae_preprocessing(adata_list, sparse=False) adata_out = davae.fit_integration(adata_all, split_by='batch', mode='DACVAE', domain_lambda=5.0, epochs=3, hidden_layers=[128, 64, 32, 5], sparse=False) sc.pp.neighbors(adata_out, use_rep='X_davae') sc.tl.umap(adata_out) sc.pl.umap(adata_out, color='celltype') print(adata_out) # adata.write_h5ad(base_path+out_path)
import scbean.model.davae as davae import scbean.tools.utils as tl import scanpy as sc import matplotlib from numpy.random import seed import umap seed(2021) matplotlib.use('TkAgg') r1 = "./data/mixed_cell_lines/mixed.h5ad" r2 = "./data/mixed_cell_lines/293t.h5ad" r3 = "./data/mixed_cell_lines/jurkat.h5ad" adata_b1 = tl.read_sc_data(r1, batch_name='mix') adata_b2 = tl.read_sc_data(r2, batch_name='293t') adata_b3 = tl.read_sc_data(r3, batch_name='jurkat') adata_all = tl.davae_preprocessing([adata_b1, adata_b2, adata_b3], n_top_genes=2000) adata_integrate = davae.fit_integration(adata_all, batch_num=3, domain_lambda=3.0, epochs=25, sparse=True, hidden_layers=[64, 32, 6]) adata_integrate.obsm['X_umap'] = umap.UMAP().fit_transform( adata_integrate.obsm['X_davae']) sc.pl.umap(adata_integrate, color=['_batch', 'celltype'], s=3)
# sc.pp.log1p(adata2) # sc.pp.scale(adata2) # adata2.obs['celltype'] = adata1.obs['celltype'] # adata2.write_h5ad(base_path + 'multimodal/atac_pbmc_10k/activaty_matrix_label.h5ad') adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=2000, hvg=False, lognorm=False) # sc.pp.scale(adata_all) print(adata_all) adata_integrate = davae.fit_integration( adata_all, # mode='DAVAE', batch_num=2, split_by='batch_label', domain_lambda=6.0, epochs=60, sparse=True, hidden_layers=[128, 64, 32, 16, 8]) print(adata_integrate) # adata_integrate.obs['celltyp']=adata import umap sc.pp.neighbors(adata_integrate, use_rep='X_davae') sc.tl.umap(adata_integrate) # adata_integrate.obs['celltype'] adata_integrate.write_h5ad( '/Users/zhongyuanke/data/dann_vae/multimodal/davae_multi_temp.h5ad') # # # label=tool.get_label_by_txt(seurat_celltype_path)
# batch_categories=[ # k # for d in [ # adata_spatial_anterior.uns["spatial"], # adata_spatial_posterior.uns["spatial"], # ] # for k, v in d.items() # ], # ) print(adata_spatial) import scbean.model.davae as davae adata_integrate = davae.fit_integration( adata_spatial, epochs=25, split_by='loss_weight', hidden_layers=[128, 64, 32, 5], sparse=True, domain_lambda=0.5, ) print(adata_spatial) # embedding_adata = sc.read_h5ad(base_path+'dann_vae/spatial/davae_save02.h5ad') adata_spatial.obsm["X_davae"] = adata_integrate.obsm['X_davae'] sc.pp.neighbors(adata_spatial, use_rep='X_davae', n_neighbors=10) sc.tl.umap(adata_spatial) sc.tl.leiden(adata_spatial, key_added="clusters") sc.pl.umap(adata_spatial, color=["library_id", "clusters"], size=4, color_map='Set2', frameon=False) # sc.pl.umap(
import scanpy as sc import argparse import scbean.model.davae as davae import scbean.tools.utils as tl parser = argparse.ArgumentParser() parser.add_argument("--base_path", type=str, default='/Users/zhongyuanke/data/hca/', help="base path") parser.add_argument("--type", type=str, default='5w', help="cell counts") parser.add_argument("--epoch", type=int, default=2, help="epochs") opt = parser.parse_args() base_path = opt.base_path file1 = base_path + 'blood_' + opt.type + '.h5ad' file2 = base_path + 'bone_' + opt.type + '.h5ad' adata1 = sc.read_h5ad(file1) adata2 = sc.read_h5ad(file2) print(adata1) print(adata2) adata_all = tl.davae_preprocessing([adata1, adata2], lognorm=False, hvg=False) adata_integrate = davae.fit_integration(adata_all, batch_num=2, split_by='batch_label', domain_lambda=2.0, epochs=opt.epoch, sparse=True, hidden_layers=[64, 32, 6])