def log_norm(
    data: MultimodalData,
    norm_count: float = 1e5,
    backup_matrix: str = "raw.X",
) -> None:
    """Normalization, and then apply natural logarithm to the data.

    Parameters
    ----------
    data: ``pegasusio.MultimodalData``
        Use current selected modality in data, which should contain one RNA expression matrix.

    norm_count: ``int``, optional, default: ``1e5``.
        Total counts of one cell after normalization.

    backup_matrix: ``str``, optional, default: ``raw.X``.
        The key name of the backup count matrix, usually the raw counts.

    Returns
    -------
    ``None``

    Update ``data.X`` with count matrix after log-normalization. In addition, back up the original count matrix as ``backup_matrix``.

    In case of rerunning normalization while ``backup_matrix`` already exists, use ``backup_matrix`` instead of ``data.X`` for normalization.

    Examples
    --------
    >>> pg.log_norm(data)
    """
    if isinstance(data, MultimodalData):
        data = data.current_data()

    assert data.get_modality() == "rna"

    if backup_matrix not in data.list_keys():
        data.add_matrix(backup_matrix, data.X)
        data.X = data.X.astype(np.float32)  # force copy
    else:
        # The case of rerunning log_norm. Use backup matrix as source.
        data.X = data.get_matrix(backup_matrix).astype(
            np.float32)  # force copy
        logger.warning(
            "Rerun log-normalization. Use the raw counts in backup instead.")

    data.obs["scale"] = normalize_by_count(data.X, data.var["robust"].values,
                                           norm_count, True)
    data.uns["norm_count"] = norm_count
Esempio n. 2
0
def log_norm(data: MultimodalData,
             norm_count: float = 1e5,
             backup_matrix: str = "raw.X") -> None:
    """Normalization, and then apply natural logarithm to the data.

    Parameters
    ----------
    data: ``pegasusio.MultimodalData``
        Use current selected modality in data, which should contain one RNA expression matrix.

    norm_count: ``int``, optional, default: ``1e5``.
        Total count of cells after normalization.

    backup_matrix: ``str``, optional, default: ``raw.X``.
        Where to back up the count matrix.

    Returns
    -------
    ``None``

    Update ``data.X`` with count matrix after log-normalization. In addition, back up the original count matrix as ``backup_matrix``.

    Examples
    --------
    >>> pg.log_norm(data)
    """
    if isinstance(data, MultimodalData):
        data = data.current_data()

    assert data.get_modality() == "rna"

    data.add_matrix(backup_matrix, data.X)
    data.X = data.X.astype(np.float32)  # force copy

    from pegasus.cylib.fast_utils import normalize_by_count
    data.obs["scale"] = normalize_by_count(data.X, data.var["robust"].values,
                                           norm_count, True)
    data.uns["norm_count"] = norm_count