コード例 #1
0
def write_mtx_file(data: MultimodalData, output_directory: str, precision: int = 2):
    """ Write output to mtx files in output_directory
    """
    output_dir = os.path.abspath(output_directory)
    os.makedirs(output_dir, exist_ok = True)

    for key in data.list_data():
        _write_mtx(data.get_data(key), os.path.join(output_dir, key), precision)

    logger.info("Mtx files are written.")
コード例 #2
0
 def write_multimodal_data(self, data: MultimodalData, overwrite: bool = True) -> None:
     """ Write MultimodalData
     """
     if overwrite:
         for key in data.list_data():
             self.write_unimodal_data(self.root, key, data.get_data(key), overwrite = overwrite)
     else:
         for key in data.data.deleted:
             del self.root[key]
         for key in data.data.accessed:
             self.write_unimodal_data(self.root, key, data.get_data(key), overwrite = key in data.data.modified)            
     self.root.attrs['_selected'] = data._selected
コード例 #3
0
def write_loom_file(data: MultimodalData, output_file: str) -> None:
    """ Write a MultimodalData to loom file. Will assert data only contain one type of experiment.
    """
    keys = data.list_data()
    if len(keys) > 1:
        raise ValueError(
            f"Data contain multiple modalities: {','.join(keys)}!")
    data.select_data(keys[0])
    matrices = data.list_keys()
    assert "X" in matrices
    if len(matrices) == 0:
        raise ValueError("Could not write empty matrix to a loom file!")

    def _process_attrs(key_name: str, attrs: pd.DataFrame,
                       attrs_multi: dict) -> Dict[str, object]:
        res_dict = {key_name: attrs.index.values}
        for key in attrs.columns:
            res_dict[key] = np.array(attrs[key].values)
        for key, value in attrs_multi.items():
            if value.ndim > 1:  # value.ndim == 1 refers to np.recarray, which will not be written to a loom file.
                res_dict[key] = value if value.shape[1] > 1 else value[:, 0]
        return res_dict

    row_attrs = _process_attrs("Gene", data.var, data.varm)
    col_attrs = _process_attrs("CellID", data.obs, data.obsm)

    accession_key = "featureid" if "featureid" in row_attrs else (
        "gene_ids" if "gene_ids" in row_attrs else None)
    if accession_key is not None:
        row_attrs["Accession"] = row_attrs.pop(accession_key)

    layers = {}
    for matkey in matrices:
        layers["" if matkey == "X" else matkey] = data.get_matrix(matkey).T

    file_attrs = {}
    for key, value in data.uns.items():
        if isinstance(value, str):
            file_attrs[key] = value

    import loompy
    loompy.create(output_file,
                  layers,
                  row_attrs,
                  col_attrs,
                  file_attrs=file_attrs)

    logger.info(f"{output_file} is written.")
コード例 #4
0
ファイル: aggr_data.py プロジェクト: slowkow/pegasusio
 def add_data(self, data: MultimodalData) -> None:
     for key in data.list_data():
         self.aggr[key].append(data.get_data(key))
コード例 #5
0
def write_scp_file(data: MultimodalData, output_name: str, is_sparse: bool = True, precision: int = 2) -> None:
    """Generate outputs for single cell portal from a MultimodalData object

    Parameters
    ----------
    data: MultimodalData
        A MultimodalData object.

    output_name: ``str``
        Name prefix for output files.

    is_sparse: ``bool``, optional, default: ``True``
        If ``True``, enforce the count matrix to be sparse after written into files.

    precision: ``int``, optional, default: ``2``
        Round numbers to ``precision`` decimal places.

    Returns
    -------
    ``None``

    If data contains only one modality, files are generated as follows:
        * ``output_name.scp.basis.coords.txt``, where ``basis`` is for each key in ``adata.obsm`` field.
        * ``output_name.scp.metadata.txt``.
        * Gene expression files:
            * If in sparse format:
                * ``output_name.scp.features.tsv``, information on genes;
                * ``output_name.scp.barcodes.tsv``, information on cell barcodes;
                * ``output_name.scp.matrix.mtx``, count matrix.
            * If not in sparse:
                * ``output_name.scp.expr.txt``.

    Otherwise, under the directory os.path.dirname(output_name), we will create separate folders per key and each separate folder has its own files in the format above.

    Examples
    --------
    >>> io.write_scp_file(data, output_name = "scp_result")
    """
    output_name = os.path.abspath(output_name)

    valid_keys = []
    for key in data.list_data():
        try:
            mat = data.get_data(key).get_matrix("X")
            if mat.data.size > 0:
                valid_keys.append(key)
        except ValueError:
            pass

    if len(valid_keys) == 1:
        unidata = data.get_data(valid_keys[0])
        _write_scp_metadata(unidata, output_name, precision = precision)
        _write_scp_coords(unidata, output_name, precision = precision)
        _write_scp_expression(unidata, output_name, is_sparse, precision = precision)
    else:
        path = os.path.dirname(output_name)
        fname = os.path.basename(output_name)
        for key in valid_keys:
            subpath = os.path.join(path, key)
            if not os.path.isdir(subpath):
                os.path.mkdir(subpath)
            out_new_name = os.path.join(subpath, fname)

            unidata = data.get_data(key)
            _write_scp_metadata(unidata, out_new_name, precision = precision)
            _write_scp_coords(unidata, out_new_name, precision = precision)
            _write_scp_expression(unidata, out_new_name, is_sparse, precision = precision)

    logger.info("write_scp_file is done.")