Beispiel #1
0
parser.add_argument("--base_path",
                    type=str,
                    default='/Users/zhongyuanke/data/hca/',
                    help="base path")
parser.add_argument("--epoch", type=int, default=15, help="epochs")
opt = parser.parse_args()

base_path = opt.base_path
epoch = opt.epoch
#
time_list = []
adata1 = sc.read_h5ad(base_path + 'blood_5w.h5ad')
adata2 = sc.read_h5ad(base_path + 'bone_5w.h5ad')
print(adata1)
print(adata2)
adata_all = tl.davae_preprocessing([adata1, adata2])
t0 = time.time()
adata_out = davae.fit_integration(adata_all,
                                  batch_size=256,
                                  epochs=epoch,
                                  sparse=True)
t1 = time.time()
print("Total time running DAVAE 10w cells: %s seconds" % (str(t1 - t0)))
time_list.append(t1 - t0)
info = psutil.virtual_memory()
print('内存使用:',
      psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 / 1024, 'GB')
print('总内存:', info.total / 1024 / 1024 / 1024, 'GB')
print('内存占比:', info.percent)
print('cpu个数:', psutil.cpu_count())
Beispiel #2
0
import scbean.model.davae as davae
import scbean.tools.utils as tl
import scanpy as sc
import matplotlib
from numpy.random import seed
seed(2021)
matplotlib.use('TkAgg')

adata = tl.read_sc_data('/Users/zhongyuanke/data/seurat_data/ifnb/ifnb.h5ad')
datasets = tl.split_object(adata, by="stim")
print(datasets[0])
print(datasets[1])
adata_all = tl.davae_preprocessing(datasets, n_top_genes=8000)
adata_intagrate = davae.fit_integration(
    adata_all,
    epochs=30,
    hidden_layers=[128, 64, 32, 5],
    domain_lambda=3.0,
)
print(adata_intagrate)
sc.pp.neighbors(adata_intagrate, use_rep='X_davae', n_neighbors=15)
sc.tl.louvain(adata_intagrate)
sc.tl.umap(adata_intagrate)
sc.pl.umap(adata_intagrate, color='louvain', cmap='tab20c')
Beispiel #3
0
# sc.pp.highly_variable_genes(adata2, n_top_genes=6000)
# adata1 = adata1[:, adata1.var.highly_variable]
# adata2 = adata2[:, adata2.var.highly_variable]
#
# adata1.write_h5ad(file1_p)
# adata2.write_h5ad(file2_p)
# del adata1.var['highly_variable']
# del adata2.var['highly_variable']
# del adata1.var['means']
# del adata2.var['means']
# del adata1.var['dispersions']
# del adata2.var['dispersions']
# del adata1.var['dispersions_norm']
# del adata2.var['dispersions_norm']
print(adata1)
print(adata2)
adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=4000)
adata_integrate = davae.fit_integration(
    adata_all,
    epochs=25,
    hidden_layers=[128, 64, 32, 5],
    sparse=True,
    domain_lambda=0.5,
)
# import umap
# adata_integrate.obsm['X_umap']=umap.UMAP().fit_transform(adata_integrate.obsm['X_davae'])
sc.pp.neighbors(adata_integrate, use_rep='X_davae', n_neighbors=8)
sc.tl.umap(adata_integrate)
sc.pl.umap(adata_integrate, color=['_batch'], s=3)
adata_integrate.write_h5ad(base_path + out_path)
Beispiel #4
0
matplotlib.use('TkAgg')

epochs = 40
base_path = '/Users/zhongyuanke/data/'

file1 = base_path + 'dann_vae/benchmark1/dc_batch1.h5ad'
file2 = base_path + 'dann_vae/benchmark1/dc_batch2.h5ad'
orig_path = base_path + 'dann_vae/benchmark1/orig.h5ad'
# -------------train---------------------
adata1 = tl.read_sc_data(file1, fmt='h5ad')
adata2 = tl.read_sc_data(file2, fmt='h5ad')
adata_orig = tl.read_sc_data(orig_path, fmt='h5ad')
# orig_label =adata_orig.obs['label']
print(adata1)
print(adata2)
adata_all = tl.davae_preprocessing([adata1, adata2],
                                   n_top_genes=4000,
                                   sparse=False)
adata_integrate = davae.fit_integration(adata_all,
                                        split_by='batch',
                                        epochs=1000,
                                        hidden_layers=[128, 64, 32, 2],
                                        sparse=False,
                                        domain_lambda=6)
adata_integrate.obs['label'] = adata_orig.obs['label']
sc.pp.neighbors(adata_integrate, use_rep='X_davae')
sc.tl.umap(adata_integrate)
sc.pl.umap(adata_integrate, color=['batch', 'label'], s=10, cmap='Dark2')
# print(adata_integrate)
# adata_integrate.write_h5ad(base_path+'dann_vae/benchmark1/dc_davae_temp.h5ad')
Beispiel #5
0
base_path = '/Users/zhongyuanke/data/'
file_rna = '/Users/zhongyuanke/data/dann_vae/multimodal/rna.h5ad'
file_atac = '/Users/zhongyuanke/data/dann_vae/multimodal/atac.h5ad'
seurat_celltype_path = base_path + 'multimodal/atac_pbmc_10k/celltype_filt.csv'
batch_size = 128

adata1 = sc.read_h5ad(file_atac)
adata2 = sc.read_h5ad(file_rna)
print(adata1)
print(adata2)
# adata_b1.obs_names_make_unique()
# adata_b2.obs_names_make_unique()
# adata_b3.obs_names_make_unique()

adata_all = tl.davae_preprocessing([adata1, adata2], n_top_genes=2000, hvg=False, lognorm=False)
adata_all.obs_names_make_unique()

adata_all = scgen.setup_anndata(adata_all, batch_key="batch_label", copy=True)
model = scgen.SCGEN(adata_all)
model.train(max_epochs=15,
    batch_size=32,
    early_stopping=True,
    early_stopping_patience=25,
    use_gpu=False)
corrected_adata = model.batch_removal()
sc.pp.neighbors(corrected_adata,use_rep='corrected_latent')
sc.tl.umap(corrected_adata)
sc.pl.umap(corrected_adata,color='batch')

corrected_adata.write('/Users/zhongyuanke/data/scgen/scgen_multimodal.h5ad')
Beispiel #6
0
import scbean.tools.utils as tl
import matplotlib
from numpy.random import seed
seed(2021)
matplotlib.use('TkAgg')

base_path = "/Users/zhongyuanke/data/vipcca/mixed_cell_lines/"

adata_b1 = sc.read_h5ad(base_path+"293t.h5ad")
adata_b2 = sc.read_h5ad(base_path+"jurkat.h5ad")
adata_b3 = sc.read_h5ad(base_path+"mixed.h5ad")
# adata_b1.obs_names_make_unique()
# adata_b2.obs_names_make_unique()
# adata_b3.obs_names_make_unique()

adata_all = tl.davae_preprocessing([adata_b1, adata_b2, adata_b3], n_top_genes=3000)
adata_all.obs_names_make_unique()

adata_all = scgen.setup_anndata(adata_all, batch_key="batch_label", copy=True)
model = scgen.SCGEN(adata_all)
model.train(max_epochs=25,
    batch_size=32,
    early_stopping=True,
    early_stopping_patience=25,
    use_gpu=False)
corrected_adata = model.batch_removal()
sc.pp.neighbors(corrected_adata,use_rep='corrected_latent')
sc.tl.umap(corrected_adata)
sc.pl.umap(corrected_adata,color='celltype')

# corrected_adata.write('/Users/zhongyuanke/data/scgen/scgen_mcl01.h5ad')
Beispiel #7
0
                    type=str,
                    default='/Users/zhongyuanke/data/',
                    help="base path")
parser.add_argument("--epoch", type=int, default=10, help="epoch of training")

opt = parser.parse_args()

base_path = opt.base_path
out_path = 'dann_vae/hca/davae_01.h5ad'
file1 = base_path + 'HCA/ica_cord_blood_h5.h5'
file2 = base_path + 'HCA/ica_bone_marrow_h5.h5'
adata1 = tl.read_sc_data(file1, fmt='10x_h5')
adata2 = tl.read_sc_data(file2, fmt='10x_h5')
adata1.var_names_make_unique()
adata2.var_names_make_unique()
print(adata1)

adata_all = tl.davae_preprocessing([adata1, adata2], hvg=False, lognorm=False)
adata_integrate = davae.fit_integration(
    adata_all,
    split_by='batch',
    domain_lambda=5,
    epochs=1,
    hidden_layers=[128, 64, 32, 5],
    sparse=True,
)
sc.pp.neighbors(adata_integrate, use_rep='X_davae')
sc.tl.umap(adata_integrate)
sc.pl.umap(adata_integrate, color='batch')
print(adata_integrate)
Beispiel #8
0
# loss_weight = np.array(loss_weight)
# print(adata1.shape)
# print(adata2.shape)
# print(orig_data.shape)

# data, batches, batch_label, loss_weight = shuffle(orig_data, orig_batches, orig_batch_label, loss_weight,
#                                             random_state=0)

# net_x = DACVAE(input_size=data.shape[1], batches=2, latent_size=10)
# net_x.build()
# net_x.compile()
# his = net_x.train(data, batches, loss_weight, epochs=epochs, batch_size=batch_size)
#
# mid = net_x.embedding(orig_data, orig_batches)
# adata_mid = anndata.AnnData(X=mid)
# adata_mid.obs['batch'] = orig_batch_label
# adata.obsm['davae'] = mid
adata_all = tl.davae_preprocessing(adata_list, sparse=False)
adata_out = davae.fit_integration(adata_all,
                                  split_by='batch',
                                  mode='DACVAE',
                                  domain_lambda=5.0,
                                  epochs=3,
                                  hidden_layers=[128, 64, 32, 5],
                                  sparse=False)
sc.pp.neighbors(adata_out, use_rep='X_davae')
sc.tl.umap(adata_out)
sc.pl.umap(adata_out, color='celltype')
print(adata_out)
# adata.write_h5ad(base_path+out_path)