def write_mtx_file(data: MultimodalData, output_directory: str, precision: int = 2): """ Write output to mtx files in output_directory """ output_dir = os.path.abspath(output_directory) os.makedirs(output_dir, exist_ok = True) for key in data.list_data(): _write_mtx(data.get_data(key), os.path.join(output_dir, key), precision) logger.info("Mtx files are written.")
def write_multimodal_data(self, data: MultimodalData, overwrite: bool = True) -> None: """ Write MultimodalData """ if overwrite: for key in data.list_data(): self.write_unimodal_data(self.root, key, data.get_data(key), overwrite = overwrite) else: for key in data.data.deleted: del self.root[key] for key in data.data.accessed: self.write_unimodal_data(self.root, key, data.get_data(key), overwrite = key in data.data.modified) self.root.attrs['_selected'] = data._selected
def write_loom_file(data: MultimodalData, output_file: str) -> None: """ Write a MultimodalData to loom file. Will assert data only contain one type of experiment. """ keys = data.list_data() if len(keys) > 1: raise ValueError( f"Data contain multiple modalities: {','.join(keys)}!") data.select_data(keys[0]) matrices = data.list_keys() assert "X" in matrices if len(matrices) == 0: raise ValueError("Could not write empty matrix to a loom file!") def _process_attrs(key_name: str, attrs: pd.DataFrame, attrs_multi: dict) -> Dict[str, object]: res_dict = {key_name: attrs.index.values} for key in attrs.columns: res_dict[key] = np.array(attrs[key].values) for key, value in attrs_multi.items(): if value.ndim > 1: # value.ndim == 1 refers to np.recarray, which will not be written to a loom file. res_dict[key] = value if value.shape[1] > 1 else value[:, 0] return res_dict row_attrs = _process_attrs("Gene", data.var, data.varm) col_attrs = _process_attrs("CellID", data.obs, data.obsm) accession_key = "featureid" if "featureid" in row_attrs else ( "gene_ids" if "gene_ids" in row_attrs else None) if accession_key is not None: row_attrs["Accession"] = row_attrs.pop(accession_key) layers = {} for matkey in matrices: layers["" if matkey == "X" else matkey] = data.get_matrix(matkey).T file_attrs = {} for key, value in data.uns.items(): if isinstance(value, str): file_attrs[key] = value import loompy loompy.create(output_file, layers, row_attrs, col_attrs, file_attrs=file_attrs) logger.info(f"{output_file} is written.")
def add_data(self, data: MultimodalData) -> None: for key in data.list_data(): self.aggr[key].append(data.get_data(key))
def write_scp_file(data: MultimodalData, output_name: str, is_sparse: bool = True, precision: int = 2) -> None: """Generate outputs for single cell portal from a MultimodalData object Parameters ---------- data: MultimodalData A MultimodalData object. output_name: ``str`` Name prefix for output files. is_sparse: ``bool``, optional, default: ``True`` If ``True``, enforce the count matrix to be sparse after written into files. precision: ``int``, optional, default: ``2`` Round numbers to ``precision`` decimal places. Returns ------- ``None`` If data contains only one modality, files are generated as follows: * ``output_name.scp.basis.coords.txt``, where ``basis`` is for each key in ``adata.obsm`` field. * ``output_name.scp.metadata.txt``. * Gene expression files: * If in sparse format: * ``output_name.scp.features.tsv``, information on genes; * ``output_name.scp.barcodes.tsv``, information on cell barcodes; * ``output_name.scp.matrix.mtx``, count matrix. * If not in sparse: * ``output_name.scp.expr.txt``. Otherwise, under the directory os.path.dirname(output_name), we will create separate folders per key and each separate folder has its own files in the format above. Examples -------- >>> io.write_scp_file(data, output_name = "scp_result") """ output_name = os.path.abspath(output_name) valid_keys = [] for key in data.list_data(): try: mat = data.get_data(key).get_matrix("X") if mat.data.size > 0: valid_keys.append(key) except ValueError: pass if len(valid_keys) == 1: unidata = data.get_data(valid_keys[0]) _write_scp_metadata(unidata, output_name, precision = precision) _write_scp_coords(unidata, output_name, precision = precision) _write_scp_expression(unidata, output_name, is_sparse, precision = precision) else: path = os.path.dirname(output_name) fname = os.path.basename(output_name) for key in valid_keys: subpath = os.path.join(path, key) if not os.path.isdir(subpath): os.path.mkdir(subpath) out_new_name = os.path.join(subpath, fname) unidata = data.get_data(key) _write_scp_metadata(unidata, out_new_name, precision = precision) _write_scp_coords(unidata, out_new_name, precision = precision) _write_scp_expression(unidata, out_new_name, is_sparse, precision = precision) logger.info("write_scp_file is done.")