import numpy as np from scbean.tools import utils as tl import scanpy as sc import pandas as pd import scanorama import argparse base_path = '/Users/zhongyuanke/data/' file1 = 'dropviz/mouse_brain_dropviz_filtered.h5ad' file2 = 'nuclei/adata_nuclei_filtered.h5ad' scan_path = 'results/scan_mouse.h5ad' # -------------train--------------------- adata1 = tl.read_sc_data(file1, fmt='h5ad') adata2 = tl.read_sc_data(file2, fmt='h5ad') # orig_label =adata_orig.obs['label'] print(adata1) print(adata2) datas = [adata1, adata2] corrected = scanorama.correct_scanpy(datas, return_dimred=True, dimred=16) adata_corrected = corrected[0].concatenate(corrected[1]) print(adata_corrected) sc.pp.neighbors(adata_corrected, use_rep='X_scanorama') sc.tl.umap(adata_corrected) adata_corrected.write_h5ad(scan_path)
import scbean.model.davae as davae import scbean.tools.utils as tl import scanpy as sc import matplotlib from numpy.random import seed seed(2021) matplotlib.use('TkAgg') adata = tl.read_sc_data('/Users/zhongyuanke/data/seurat_data/ifnb/ifnb.h5ad') datasets = tl.split_object(adata, by="stim") print(datasets[0]) print(datasets[1]) adata_all = tl.davae_preprocessing(datasets, n_top_genes=8000) adata_intagrate = davae.fit_integration( adata_all, epochs=30, hidden_layers=[128, 64, 32, 5], domain_lambda=3.0, ) print(adata_intagrate) sc.pp.neighbors(adata_intagrate, use_rep='X_davae', n_neighbors=15) sc.tl.louvain(adata_intagrate) sc.tl.umap(adata_intagrate) sc.pl.umap(adata_intagrate, color='louvain', cmap='tab20c')
import matplotlib from numpy.random import seed seed(2021) matplotlib.use('TkAgg') base_path = '/Users/zhongyuanke/data/' out_path = 'dann_vae/spatial/davae_save02.h5ad' file1 = base_path + 'spatial/mouse_brain/10x_mouse_brain_Anterior/V1_Mouse_Brain_Sagittal_Anterior_filtered_feature_bc_matrix.h5' file2 = base_path + 'spatial/mouse_brain/10x_mouse_brain_Posterior/V1_Mouse_Brain_Sagittal_Posterior_filtered_feature_bc_matrix.h5' file1_p = base_path + 'spatial/10x_mouse_brain_Anterior/anterior.h5ad' file2_p = base_path + 'spatial/10x_mouse_brain_Posterior/posterior.h5ad' batch_size = 256 epochs = 25 # adata1 = sc.read_10x_h5(file1) adata1 = tl.read_sc_data(file1, fmt='10x_h5', batch_name='Anterior') adata2 = tl.read_sc_data(file2, fmt='10x_h5', batch_name='Posterior') print(adata1) # print(adata1) adata1.var_names_make_unique() adata2.var_names_make_unique() # len1 = adata1.shape[0] # len2 = adata2.shape[0] # sc.pp.filter_genes(adata1, min_cells=30) # sc.pp.filter_genes(adata2, min_cells=30) # sc.pp.log1p(adata1) # sc.pp.log1p(adata2) # print(adata1) # print(adata2) #
from scbean.model import davae as davae import anndata import scanpy as sc import pandas as pd import matplotlib matplotlib.use('TkAgg') epochs = 40 base_path = '/Users/zhongyuanke/data/' file1 = base_path + 'dann_vae/benchmark1/dc_batch1.h5ad' file2 = base_path + 'dann_vae/benchmark1/dc_batch2.h5ad' orig_path = base_path + 'dann_vae/benchmark1/orig.h5ad' # -------------train--------------------- adata1 = tl.read_sc_data(file1, fmt='h5ad') adata2 = tl.read_sc_data(file2, fmt='h5ad') adata_orig = tl.read_sc_data(orig_path, fmt='h5ad') # orig_label =adata_orig.obs['label'] print(adata1) print(adata2) adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=4000, sparse=False) adata_integrate = davae.fit_integration(adata_all, split_by='batch', epochs=1000, hidden_layers=[128, 64, 32, 2], sparse=False, domain_lambda=6) adata_integrate.obs['label'] = adata_orig.obs['label']
import scbean.model.vipcca as vip import scbean.tools.utils as tl import scbean.tools.plotting as pl # Please choose an appropiate matplotlib backend. import matplotlib # matplotlib.use('TkAgg') # read single-cell data. adata_b1 = tl.read_sc_data("./data/mixed_cell_lines/293t.h5ad", batch_name="293t") adata_b2 = tl.read_sc_data("./data/mixed_cell_lines/jurkat.h5ad", batch_name="jurkat") adata_b3 = tl.read_sc_data("./data/mixed_cell_lines/mixed.h5ad", batch_name="mixed") # tl.preprocessing include filteration, log-TPM normalization, selection of highly variable genes. adata_all= tl.preprocessing([adata_b1, adata_b2, adata_b3]) # Construct VIPCCA with specific setting. handle = vip.VIPCCA( adata_all, res_path='./results/CVAE_5/', split_by="_batch", epochs=100, lambda_regulizer=5, ) # Training and integrating multiple single-cell datasets. The VIPCCA's output include cell representation in reduced dimensional space and recovered gene expression. adata_integrate=handle.fit_integrate() # Visualization pl.run_embedding(adata_integrate, path='./results/CVAE_5/',method="umap") pl.plotEmbedding(adata_integrate, path='./results/CVAE_5/', method='umap', group_by="_batch",legend_loc="right margin")
matplotlib.use('TkAgg') parser = argparse.ArgumentParser() parser.add_argument("--base_path", type=str, default='/Users/zhongyuanke/data/', help="base path") parser.add_argument("--epoch", type=int, default=10, help="epoch of training") opt = parser.parse_args() base_path = opt.base_path out_path = 'dann_vae/hca/davae_01.h5ad' file1 = base_path + 'HCA/ica_cord_blood_h5.h5' file2 = base_path + 'HCA/ica_bone_marrow_h5.h5' adata1 = tl.read_sc_data(file1, fmt='10x_h5') adata2 = tl.read_sc_data(file2, fmt='10x_h5') adata1.var_names_make_unique() adata2.var_names_make_unique() print(adata1) adata_all = tl.davae_preprocessing([adata1, adata2], hvg=False, lognorm=False) adata_integrate = davae.fit_integration( adata_all, split_by='batch', domain_lambda=5, epochs=1, hidden_layers=[128, 64, 32, 5], sparse=True, ) sc.pp.neighbors(adata_integrate, use_rep='X_davae')
import scbean.model.davae as davae import scbean.tools.utils as tl import scanpy as sc import matplotlib from numpy.random import seed seed(2021) matplotlib.use('TkAgg') base_path = "/Users/zhongyuanke/data/vipcca/mixed_cell_lines/" adata_b1 = tl.read_sc_data(base_path + "293t.h5ad", batch_name="293t") adata_b2 = tl.read_sc_data(base_path + "jurkat.h5ad", batch_name="jurkat") adata_b3 = tl.read_sc_data(base_path + "mixed.h5ad", batch_name="mixed") adata = adata_b1.concatenate(adata_b2) adata = adata.concatenate(adata_b3) adata.write_h5ad('/Users/zhongyuanke/data/pbmc/zheng/mcl.h5ad') adata_all = tl.davae_preprocessing([adata_b1, adata_b2, adata_b3], n_top_genes=3000) print(adata_all) print(adata_all) adata_integrate = davae.fit_integration(adata_all, batch_num=3, split_by='batch_label', domain_lambda=3.0, epochs=25, sparse=True, hidden_layers=[128, 64, 32, 5]) # sc.pp.neighbors(adata_integrate, use_rep='X_davae', n_neighbors=10) # sc.tl.umap(adata_integrate) import umap
import scbean.model.davae as davae import scbean.tools.utils as tl import scanpy as sc import matplotlib from numpy.random import seed import umap seed(2021) matplotlib.use('TkAgg') r1 = "./data/mixed_cell_lines/mixed.h5ad" r2 = "./data/mixed_cell_lines/293t.h5ad" r3 = "./data/mixed_cell_lines/jurkat.h5ad" adata_b1 = tl.read_sc_data(r1, batch_name='mix') adata_b2 = tl.read_sc_data(r2, batch_name='293t') adata_b3 = tl.read_sc_data(r3, batch_name='jurkat') adata_all = tl.davae_preprocessing([adata_b1, adata_b2, adata_b3], n_top_genes=2000) adata_integrate = davae.fit_integration(adata_all, batch_num=3, domain_lambda=3.0, epochs=25, sparse=True, hidden_layers=[64, 32, 6]) adata_integrate.obsm['X_umap'] = umap.UMAP().fit_transform( adata_integrate.obsm['X_davae']) sc.pl.umap(adata_integrate, color=['_batch', 'celltype'], s=3)