def _write_mtx(unidata: UnimodalData, output_dir: str, precision: int): """ Write Unimodal data to mtx """ try: from pegasusio.cylib.io import write_mtx except ModuleNotFoundError: print("No module named 'pegasusio.cylib.io'") if not os.path.isdir(output_dir): os.mkdir(output_dir) for key in unidata.list_keys(): matrix = unidata.matrices[key] mtx_file = os.path.join(output_dir, ("matrix" if key == "X" else key) + ".mtx.gz") fifo_file = mtx_file + ".fifo" if os.path.exists(fifo_file): os.unlink(fifo_file) os.mkfifo(fifo_file) pobj = subprocess.Popen(f"gzip < {shlex.quote(fifo_file)} > {shlex.quote(mtx_file)}", shell = True) write_mtx(fifo_file, matrix.data, matrix.indices, matrix.indptr, matrix.shape[0], matrix.shape[1], precision = precision) # matrix is cell x gene csr_matrix, will write as gene x cell assert pobj.wait() == 0 os.unlink(fifo_file) logger.info(f"{mtx_file} is written.") unidata.barcode_metadata.to_csv(os.path.join(output_dir, "barcodes.tsv.gz"), sep = '\t') logger.info("barcodes.tsv.gz is written.") unidata.feature_metadata.to_csv(os.path.join(output_dir, "features.tsv.gz"), sep = '\t') logger.info("features.tsv.gz is written.") logger.info(f"Mtx for {unidata.get_uid()} is written.")
def deseq2( pseudobulk: UnimodalData, design: str, contrast: Tuple[str, str, str], de_key: str = "deseq2", replaceOutliers: bool = True, ) -> None: """Perform Differential Expression (DE) Analysis using DESeq2 on pseduobulk data. This function calls R package DESeq2, requiring DESeq2 in R installed. DE analysis will be performed on all pseudo-bulk matrices in pseudobulk. Parameters ---------- pseudobulk: ``UnimodalData`` Pseudobulk data with rows for samples and columns for genes. If pseudobulk contains multiple matrices, DESeq2 will apply to all matrices. design: ``str`` Design formula that will be passed to DESeq2 contrast: ``Tuple[str, str, str]`` A tuple of three elements passing to DESeq2: a factor in design formula, a level in the factor as numeritor of fold change, and a level as denominator of fold change. de_key: ``str``, optional, default: ``"deseq2"`` Key name of DE analysis results stored. For cluster.X, stored key will be cluster.de_key replaceOutliers: ``bool``, optional, default: ``True`` If execute DESeq2's replaceOutliers step. If set to ``False``, we will set minReplicatesForReplace=Inf in ``DESeq`` function and set cooksCutoff=False in ``results`` function. Returns ------- ``None`` Update ``pseudobulk.varm``: ``pseudobulk.varm[de_key]``: DE analysis result for pseudo-bulk count matrix. ``pseudobulk.varm[cluster.de_key]``: DE results for cluster-specific pseudo-bulk count matrices. Examples -------- >>> pg.deseq2(pseudobulk, '~gender', ('gender', 'female', 'male')) """ try: import rpy2.robjects as ro from rpy2.robjects import pandas2ri, numpy2ri, Formula from rpy2.robjects.packages import importr from rpy2.robjects.conversion import localconverter except ModuleNotFoundError as e: import sys logger.error(f"{e}\nNeed rpy2! Try 'pip install rpy2'.") sys.exit(-1) try: deseq2 = importr('DESeq2') except ModuleNotFoundError: import sys text = """Please install DESeq2 in order to run this function.\n To install this package, start R and enter:\n if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("DESeq2")""" logger.error(text) sys.exit(-1) import math to_dataframe = ro.r('function(x) data.frame(x)') for mat_key in pseudobulk.list_keys(): with localconverter(ro.default_converter + numpy2ri.converter + pandas2ri.converter): dds = deseq2.DESeqDataSetFromMatrix( countData=pseudobulk.get_matrix(mat_key).T, colData=pseudobulk.obs, design=Formula(design)) if replaceOutliers: dds = deseq2.DESeq(dds) res = deseq2.results(dds, contrast=ro.StrVector(contrast)) else: dds = deseq2.DESeq(dds, minReplicatesForReplace=math.inf) res = deseq2.results(dds, contrast=ro.StrVector(contrast), cooksCutoff=False) with localconverter(ro.default_converter + pandas2ri.converter): res_df = ro.conversion.rpy2py(to_dataframe(res)) res_df.fillna( { 'log2FoldChange': 0.0, 'lfcSE': 0.0, 'stat': 0.0, 'pvalue': 1.0, 'padj': 1.0 }, inplace=True) de_res_key = de_key if mat_key.find( '.') < 0 else f"{mat_key.partition('.')[0]}.{de_key}" pseudobulk.varm[de_res_key] = res_df.to_records(index=False)