def write_multimodal_data(self, data: MultimodalData, overwrite: bool = True) -> None: """ Write MultimodalData """ if overwrite: for key in data.list_data(): self.write_unimodal_data(self.root, key, data.get_data(key), overwrite = overwrite) else: for key in data.data.deleted: del self.root[key] for key in data.data.accessed: self.write_unimodal_data(self.root, key, data.get_data(key), overwrite = key in data.data.modified) self.root.attrs['_selected'] = data._selected
def write_mtx_file(data: MultimodalData, output_directory: str, precision: int = 2): """ Write output to mtx files in output_directory """ output_dir = os.path.abspath(output_directory) os.makedirs(output_dir, exist_ok = True) for key in data.list_data(): _write_mtx(data.get_data(key), os.path.join(output_dir, key), precision) logger.info("Mtx files are written.")
def _run_filter_data( data: MultimodalData, focus_list: List[str] = None, output_filt: str = None, plot_filt: str = None, plot_filt_figsize: Tuple[int, int] = None, min_genes_before_filt: int = 100, select_singlets: bool = False, remap_string: str = None, subset_string: str = None, min_genes: int = 500, max_genes: int = 6000, min_umis: int = None, max_umis: int = None, mito_prefix: str = "MT-", percent_mito: float = 20.0, percent_cells: float = 0.05, ) -> None: """ This function is for command line use. """ if focus_list is None: focus_list = [data.current_key()] mito_dict = DictWithDefault(mito_prefix) for key in focus_list: unidata = data.get_data(key) qc_metrics( unidata, select_singlets, remap_string, subset_string, min_genes, max_genes, min_umis, max_umis, mito_dict.get(unidata.get_genome()), percent_mito, ) if output_filt is not None: group_key = unidata.get_uid() writer = pd.ExcelWriter(f"{output_filt}.{group_key}.filt.xlsx", engine="xlsxwriter") df_cells = get_filter_stats( unidata, min_genes_before_filt=min_genes_before_filt) df_cells.to_excel(writer, sheet_name="Cell filtration stats") writer.save() logger.info(f"Filtration results for {group_key} are written.") if plot_filt is not None: _generate_filter_plots(unidata, plot_filt, plot_filt_figsize=plot_filt_figsize, min_genes_before_filt=min_genes_before_filt) filter_data(data, focus_list=focus_list) for key in focus_list: unidata = data.get_data(key) identify_robust_genes(unidata, percent_cells=percent_cells)
def add_data(self, data: MultimodalData) -> None: for key in data.list_data(): self.aggr[key].append(data.get_data(key))
def write_scp_file(data: MultimodalData, output_name: str, is_sparse: bool = True, precision: int = 2) -> None: """Generate outputs for single cell portal from a MultimodalData object Parameters ---------- data: MultimodalData A MultimodalData object. output_name: ``str`` Name prefix for output files. is_sparse: ``bool``, optional, default: ``True`` If ``True``, enforce the count matrix to be sparse after written into files. precision: ``int``, optional, default: ``2`` Round numbers to ``precision`` decimal places. Returns ------- ``None`` If data contains only one modality, files are generated as follows: * ``output_name.scp.basis.coords.txt``, where ``basis`` is for each key in ``adata.obsm`` field. * ``output_name.scp.metadata.txt``. * Gene expression files: * If in sparse format: * ``output_name.scp.features.tsv``, information on genes; * ``output_name.scp.barcodes.tsv``, information on cell barcodes; * ``output_name.scp.matrix.mtx``, count matrix. * If not in sparse: * ``output_name.scp.expr.txt``. Otherwise, under the directory os.path.dirname(output_name), we will create separate folders per key and each separate folder has its own files in the format above. Examples -------- >>> io.write_scp_file(data, output_name = "scp_result") """ output_name = os.path.abspath(output_name) valid_keys = [] for key in data.list_data(): try: mat = data.get_data(key).get_matrix("X") if mat.data.size > 0: valid_keys.append(key) except ValueError: pass if len(valid_keys) == 1: unidata = data.get_data(valid_keys[0]) _write_scp_metadata(unidata, output_name, precision = precision) _write_scp_coords(unidata, output_name, precision = precision) _write_scp_expression(unidata, output_name, is_sparse, precision = precision) else: path = os.path.dirname(output_name) fname = os.path.basename(output_name) for key in valid_keys: subpath = os.path.join(path, key) if not os.path.isdir(subpath): os.path.mkdir(subpath) out_new_name = os.path.join(subpath, fname) unidata = data.get_data(key) _write_scp_metadata(unidata, out_new_name, precision = precision) _write_scp_coords(unidata, out_new_name, precision = precision) _write_scp_expression(unidata, out_new_name, is_sparse, precision = precision) logger.info("write_scp_file is done.")