def load_data(filename, batch_size): import sys sys.path.insert(0, './lib/pylib/') import preprocess_10x as prep adata = prep.read_10x_data(filename, format_type="10x_h5ad") # adata = adata[adata.obs.replicate_id.cat.codes<4,:] n_batches = int(adata.shape[0] / batch_size) input_size = int(adata.shape[1]) nindex = np.random.permutation(adata.obs.index) adata = adata[nindex, :] return adata, n_batches, input_size
def load_data(filename, batch_size, ref=True): adata = prep.read_10x_data(filename, format_type="10x_h5ad") adata.obs['cell_type_code'] = adata.obs['cell_type'].cat.codes adata.obs['cell_type_code'] = adata.obs['cell_type_code'].astype( "category") if ref: adata = adata[adata.obs.replicate_id.cat.codes < 4, :] else: adata = adata[adata.obs.replicate_id.cat.codes >= 4, :] adata = prep.normalization(adata, path=path, hg=False, filter_disp_genes=ref) n_batches = int(adata.shape[0] / batch_size) input_size = int(adata.shape[1]) nindex = np.random.permutation(adata.obs.index) adata = adata[nindex, :] return adata, n_batches, input_size
sys.path.insert(0,'./lib/pylib/') import preprocess_10x as prep import zinb_AE as zae import scanpy as sc import matplotlib.pyplot as pl import seaborn as sns import pandas as pd import os sns.set(style='white', rc={'figure.figsize':(8,6), 'figure.dpi':150}) # Read projection after louvain and leiden clusters method="t_test" # path="./result/ica_bm_qc3/" path=sys.argv[1] cluster_type="louvain" h5adfile_after_cluster=path+"ica_markers_"+method+".h5ad" adata = prep.read_10x_data(h5adfile_after_cluster,"10x_h5ad") sc.pl.tsne(adata,color=['LYZ','CST3','CD14','MS4A7','FCGR3A','FCER1A'],show=False,ncols=3) pl.savefig(path+"ica_tsne_marker1.png") pl.close() sc.pl.tsne(adata,color=['GNLY','NKG7','KLRB1','IL7R','CD8B','CD27'],show=False,ncols=3) pl.savefig(path+"ica_tsne_marker2.png") pl.close() sc.pl.tsne(adata,color=['CD79A','MME','MS4A1','SEPP1','SDC1','MZB1'],show=False,ncols=3) pl.savefig(path+"ica_tsne_marker3.png") pl.close() sc.pl.tsne(adata,color=['PPBP','IL3RA','HBB','CD34','PF4','RPL34'],show=False,ncols=3) pl.savefig(path+"ica_tsne_marker4.png") pl.close() sc.pl.violin(adata, ['n_genes', 'n_counts', 'percent_mito'],jitter=0.4, multi_panel=True,show=False)
import sys sys.path.insert(0, './lib/pylib/') import preprocess_10x as prep ifile = sys.argv[1] ifile2 = sys.argv[2] path = sys.argv[3] ############## Read 10x datasets################### adata = prep.read_10x_data(ifile, "10x_h5") adata_cluster = prep.read_10x_data(ifile2, "10x_h5ad") cellindex = adata_cluster.obs_names adata = adata[cellindex, :] # adata.obs = adata.obs.join(adata_cluster.obs) adata.obs['louvain'] = adata_cluster.obs['louvain'] adata.uns = adata_cluster.uns adata.obsm = adata_cluster.obsm prep.write2mtx(adata, path=path)
import sys sys.path.insert(0, './lib/pylib/') import preprocess_10x as prep import zinb_AE as zae ############## train model from ./test/train_model_test.py ############## Read reference panel in h5ad################### # ifile='./data/hca/ica_bm_after_filtered_recipe_zheng_qc1.h5ad' # ifile_raw='./data/hca/ica_bm_after_filtered_raw_recipe_zheng_qc1.h5ad' # outputpath="./result/ica_bm_qc1/" ifile = sys.argv[1] ifile_raw = sys.argv[2] outputpath = sys.argv[3] adata = prep.read_10x_data(ifile, "10x_h5ad", 'r') adataraw = prep.read_10x_data(ifile_raw, "10x_h5ad") adata.raw = adataraw.copy() ############# Predict latent layer##################### zae.prediction(adata, outputpath)
import sys, os sys.path.insert(0, './lib/pylib/') import preprocess_10x as prep import numpy as np from anndata import AnnData datapath = os.listdir("./data/zheng/") datapath.remove("293t_filtered_gene_bc_matrices_mex") datapath.remove("frozen_pbmc_donor_b") datapath.remove("frozen_pbmc_donor_c") datapath.remove("fresh_68k_pbmc_donor_a") for i in range(len(datapath)): file = "./data/zheng/" + datapath[i] + "/hg19/" adata = prep.read_10x_data(file, "10x_mtx") adata.obs['cell_type'] = datapath[i] if i == 0: X = adata.X.toarray() obs = adata.obs var = adata.var else: X = np.vstack((X, adata.X.toarray())) obs = obs.append(adata.obs) adata = AnnData(X, obs=obs, var=var, dtype=X.dtype.name, filemode=True) adata.write("./data/zheng/ten_mixed_cell_types.h5ad", compression="gzip")
sys.path.insert(0, './lib/pylib/') import preprocess_10x as prep import zinb_AE as zae import scanpy as sc import scanpy.api import matplotlib.pyplot as pl import seaborn as sns import random sns.set(style='white', rc={'figure.figsize': (8, 6), 'figure.dpi': 150}) ############## train model from ./test/train_model_test.py # ############# Read reference panel in h5ad################### ifile = './data/hca/ica_bm_after_filtered_recipe_zheng_qc1.h5ad' ifile_raw = './data/hca/ica_bm_after_filtered_raw_recipe_zheng_qc1.h5ad' outputpath = "./result/ica_bm_qc1/" adata = prep.read_10x_data(ifile, "10x_h5ad", 'r') adataraw = prep.read_10x_data(ifile_raw, "10x_h5ad") adata.raw = adataraw.copy() ############# Predict latent layer##################### zae.prediction(adata, outputpath) ############## Read projection and plot clusters######## filepath = "result/ica_bm_qc1/" adata = prep.read_10x_data(filepath + "projection.csv", "10x_csv") # adata = prep.read_10x_data(filepath+"ica_clusters.h5ad","10x_h5ad") zae.plotCluster(adata, filepath=filepath) ############## Plot Clusters ############################## # ifile="result/zheng/frozen_pbmc_donor_b/projection.csv" # h5adfile="./result/zheng/frozen_pbmc_donor_b/frozen_pbmc_donor_b_clusters.h5ad" # o1="result/figures/plot_umap_frozen_pbmc_donor_b.pdf" # o2="result/figures/plot_tsne_frozen_pbmc_donor_b.pdf" # adata = prep.read_10x_data(ifile,"10x_csv")
import sys sys.path.insert(0, './lib/pylib/') import preprocess_10x as prep import zinb_AE as zae import scanpy as sc import scanpy.api import matplotlib.pyplot as pl import seaborn as sns import random sns.set(style='white', rc={'figure.figsize': (8, 6), 'figure.dpi': 150}) ############## Read projection and plot clusters######## filepath = sys.argv[1] adata = prep.read_10x_data(filepath + "projection.csv", "10x_csv") zae.plotCluster(adata, filepath=filepath, dm_reduction=True)