Python AnnDataManager.get_from_registry 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: scvi.data.anndata

클래스/타입: AnnDataManager

메소드/함수: get_from_registry

hotexamples.com에서의 예제들: 5

Python AnnDataManager.get_from_registry - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 scvi.data.anndata.AnnDataManager.get_from_registry에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

AnnDataManager(8)

register_fields(8)

get_from_registry(5)

get_state_registry(3)

예제 #1

파일 보기

def cite_seq_raw_counts_properties(
    adata_manager: AnnDataManager,
    idx1: Union[List[int], np.ndarray],
    idx2: Union[List[int], np.ndarray],
) -> Dict[str, np.ndarray]:
    """
    Computes and returns some statistics on the raw counts of two sub-populations.

    Parameters
    ----------
    adata_manager
        :class:`~scvi.data.anndata.AnnDataManager` object setup with :class:`~scvi.model.TOTALVI`.
    idx1
        subset of indices describing the first population.
    idx2
        subset of indices describing the second population.

    Returns
    -------
    type
        Dict of ``np.ndarray`` containing, by pair (one for each sub-population),
        mean expression per gene, proportion of non-zero expression per gene, mean of normalized expression.
    """
    gp = scrna_raw_counts_properties(adata_manager, idx1, idx2)
    protein_exp = adata_manager.get_from_registry(
        REGISTRY_KEYS.PROTEIN_EXP_KEY)

    nan = np.array([np.nan] * adata_manager.summary_stats.n_proteins)
    protein_exp = adata_manager.get_from_registry(
        REGISTRY_KEYS.PROTEIN_EXP_KEY)
    mean1_pro = np.asarray(protein_exp[idx1].mean(0))
    mean2_pro = np.asarray(protein_exp[idx2].mean(0))
    nonz1_pro = np.asarray((protein_exp[idx1] > 0).mean(0))
    nonz2_pro = np.asarray((protein_exp[idx2] > 0).mean(0))
    properties = dict(
        raw_mean1=np.concatenate([gp["raw_mean1"], mean1_pro]),
        raw_mean2=np.concatenate([gp["raw_mean2"], mean2_pro]),
        non_zeros_proportion1=np.concatenate(
            [gp["non_zeros_proportion1"], nonz1_pro]),
        non_zeros_proportion2=np.concatenate(
            [gp["non_zeros_proportion2"], nonz2_pro]),
        raw_normalized_mean1=np.concatenate([gp["raw_normalized_mean1"], nan]),
        raw_normalized_mean2=np.concatenate([gp["raw_normalized_mean2"], nan]),
    )

    return properties

예제 #2

파일 보기

def _init_library_size(adata_manager: AnnDataManager,
                       n_batch: dict) -> Tuple[np.ndarray, np.ndarray]:
    """
    Computes and returns library size.

    Parameters
    ----------
    adata_manager
        :class:`~scvi.data.anndata.AnnDataManager` object setup with :class:`~scvi.model.SCVI`.
    n_batch
        Number of batches.

    Returns
    -------
    type
        Tuple of two 1 x n_batch ``np.ndarray`` containing the means and variances
        of library size in each batch in adata.

        If a certain batch is not present in the adata, the mean defaults to 0,
        and the variance defaults to 1. These defaults are arbitrary placeholders which
        should not be used in any downstream computation.
    """
    data = adata_manager.get_from_registry(REGISTRY_KEYS.X_KEY)
    batch_indices = adata_manager.get_from_registry(REGISTRY_KEYS.BATCH_KEY)

    library_log_means = np.zeros(n_batch)
    library_log_vars = np.ones(n_batch)

    for i_batch in np.unique(batch_indices):
        idx_batch = np.squeeze(batch_indices == i_batch)
        batch_data = data[idx_batch.nonzero()
                          [0]]  # h5ad requires integer indexing arrays.
        sum_counts = batch_data.sum(axis=1)
        masked_log_sum = np.ma.log(sum_counts)
        if np.ma.is_masked(masked_log_sum):
            warnings.warn(
                "This dataset has some empty cells, this might fail inference."
                "Data should be filtered with `scanpy.pp.filter_cells()`")

        log_counts = masked_log_sum.filled(0)
        library_log_means[i_batch] = np.mean(log_counts).astype(np.float32)
        library_log_vars[i_batch] = np.var(log_counts).astype(np.float32)

    return library_log_means.reshape(1, -1), library_log_vars.reshape(1, -1)

예제 #3

파일 보기

    def create_doublets(
        cls,
        adata_manager: AnnDataManager,
        doublet_ratio: int,
        indices: Optional[Sequence[int]] = None,
        seed: int = 1,
    ) -> AnnData:
        """Simulate doublets.

        Parameters
        ----------
        adata
            AnnData object setup with setup_anndata.
        doublet_ratio
            Ratio of generated doublets to produce relative to number of
            cells in adata or length of indices, if not `None`.
        indices
            Indices of cells in adata to use. If `None`, all cells are used.
        seed
            Seed for reproducibility
        """
        adata = adata_manager.adata
        n_obs = adata.n_obs if indices is None else len(indices)
        num_doublets = doublet_ratio * n_obs

        # counts can be in many locations, this uses where it was registered in setup
        x = adata_manager.get_from_registry(REGISTRY_KEYS.X_KEY)
        if indices is not None:
            x = x[indices]

        random_state = np.random.RandomState(seed=seed)
        parent_inds = random_state.choice(n_obs, size=(num_doublets, 2))
        doublets = x[parent_inds[:, 0]] + x[parent_inds[:, 1]]

        doublets_ad = AnnData(doublets)
        doublets_ad.var_names = adata.var_names
        doublets_ad.obs_names = [
            "sim_doublet_{}".format(i) for i in range(num_doublets)
        ]

        # if adata setup with a layer, need to add layer to doublets adata
        layer = adata_manager.data_registry[REGISTRY_KEYS.X_KEY].attr_key
        if layer is not None:
            doublets_ad.layers[layer] = doublets

        return doublets_ad

예제 #4

파일 보기

def scatac_raw_counts_properties(
    adata_manager: AnnDataManager,
    idx1: Union[List[int], np.ndarray],
    idx2: Union[List[int], np.ndarray],
    var_idx: Optional[Union[List[int], np.ndarray]] = None,
) -> Dict[str, np.ndarray]:
    """
    Computes and returns some statistics on the raw counts of two sub-populations.

    Parameters
    ----------
    adata_manager
        :class:`~scvi.data.anndata.AnnDataManager` object setup with :class:`~scvi.model.SCVI`.
    idx1
        subset of indices describing the first population.
    idx2
        subset of indices describing the second population.
    var_idx
        subset of variables to extract properties from. if None, all variables are used.

    Returns
    -------
    type
        Dict of ``np.ndarray`` containing, by pair (one for each sub-population).
    """
    data = adata_manager.get_from_registry(REGISTRY_KEYS.X_KEY)
    data1 = data[idx1]
    data2 = data[idx2]
    if var_idx is not None:
        data1 = data1[:, var_idx]
        data2 = data2[:, var_idx]
    mean1 = np.asarray((data1 > 0).mean(axis=0)).ravel()
    mean2 = np.asarray((data2 > 0).mean(axis=0)).ravel()
    properties = dict(emp_mean1=mean1,
                      emp_mean2=mean2,
                      emp_effect=(mean1 - mean2))
    return properties

예제 #5

파일 보기

def scrna_raw_counts_properties(
    adata_manager: AnnDataManager,
    idx1: Union[List[int], np.ndarray],
    idx2: Union[List[int], np.ndarray],
    var_idx: Optional[Union[List[int], np.ndarray]] = None,
) -> Dict[str, np.ndarray]:
    """
    Computes and returns some statistics on the raw counts of two sub-populations.

    Parameters
    ----------
    adata_manager
        :class:`~scvi.data.anndata.AnnDataManager` object setup with :class:`~scvi.model.SCVI`.
    idx1
        subset of indices describing the first population.
    idx2
        subset of indices describing the second population.
    var_idx
        subset of variables to extract properties from. if None, all variables are used.

    Returns
    -------
    type
        Dict of ``np.ndarray`` containing, by pair (one for each sub-population),
        mean expression per gene, proportion of non-zero expression per gene, mean of normalized expression.
    """
    adata = adata_manager.adata
    data = adata_manager.get_from_registry(REGISTRY_KEYS.X_KEY)
    data1 = data[idx1]
    data2 = data[idx2]
    if var_idx is not None:
        data1 = data1[:, var_idx]
        data2 = data2[:, var_idx]

    mean1 = np.asarray((data1).mean(axis=0)).ravel()
    mean2 = np.asarray((data2).mean(axis=0)).ravel()
    nonz1 = np.asarray((data1 != 0).mean(axis=0)).ravel()
    nonz2 = np.asarray((data2 != 0).mean(axis=0)).ravel()

    key = "_scvi_raw_norm_scaling"
    if key not in adata.obs.keys():
        scaling_factor = 1 / np.asarray(data.sum(axis=1)).ravel().reshape(
            -1, 1)
        scaling_factor *= 1e4
        adata.obs[key] = scaling_factor.ravel()
    else:
        scaling_factor = adata.obs[key].to_numpy().ravel().reshape(-1, 1)

    if issubclass(type(data), sp_sparse.spmatrix):
        norm_data1 = data1.multiply(scaling_factor[idx1])
        norm_data2 = data2.multiply(scaling_factor[idx2])
    else:
        norm_data1 = data1 * scaling_factor[idx1]
        norm_data2 = data2 * scaling_factor[idx2]

    norm_mean1 = np.asarray(norm_data1.mean(axis=0)).ravel()
    norm_mean2 = np.asarray(norm_data2.mean(axis=0)).ravel()

    properties = dict(
        raw_mean1=mean1,
        raw_mean2=mean2,
        non_zeros_proportion1=nonz1,
        non_zeros_proportion2=nonz2,
        raw_normalized_mean1=norm_mean1,
        raw_normalized_mean2=norm_mean2,
    )
    return properties