def compute_query_projection( what: Union[str, ut.Matrix] = "__x__", *, adata: AnnData, qdata: AnnData, weights: ut.Matrix, atlas_total_umis: Optional[ut.Vector] = None, query_total_umis: Optional[ut.Vector] = None, ) -> None: """ Compute the projected image of the query on the atlas. **Input** Annotated query ``qdata`` and atlas ``adata``, where the observations are cells and the variables are genes, where ``what`` is a per-variable-per-observation matrix or the name of a per-variable-per-observation annotation containing such a matrix. The ``weights`` of the projection where each row is a query metacell, each column is an atlas metacell, and the value is the weight of the atlas cell for projecting the metacell, such that the sum of weights in each row is one. **Returns** In addition, sets the following annotations in ``qdata``: Observation (Cell) Annotations ``projection`` The number of UMIs of each gene in the projected image of the query to the metacell, if the total number of UMIs in the projection is equal to the total number of UMIs in the query metacell. **Computation Parameters** 1. Compute the fraction of each gene in the atlas and the query based on the total UMIs, unless ``atlas_total_umis`` and/or ``query_total_umis`` are specified. 2. Compute the projected image of each query metacell on the atlas using the weights. 3. Convert this image to UMIs count based on the total UMIs of each metacell. Note that if overriding the total atlas or query UMIs, this means that the result need not sum to this total. """ assert np.all(adata.var_names == qdata.var_names) atlas_umis = ut.get_vo_proper(adata, what, layout="row_major") query_umis = ut.get_vo_proper(qdata, what, layout="row_major") if atlas_total_umis is None: atlas_total_umis = ut.sum_per(atlas_umis, per="row") atlas_total_umis = ut.to_numpy_vector(atlas_total_umis) if query_total_umis is None: query_total_umis = ut.sum_per(query_umis, per="row") query_total_umis = ut.to_numpy_vector(query_total_umis) atlas_fractions = ut.to_numpy_matrix(ut.fraction_by(atlas_umis, by="row", sums=atlas_total_umis)) projected_fractions = weights @ atlas_fractions # type: ignore projected_umis = ut.scale_by(projected_fractions, scale=query_total_umis, by="row") ut.set_vo_data(qdata, "projected", projected_umis)
def _test_per(rows_matrix: ut.Matrix) -> None: columns_matrix = ut.to_layout(rows_matrix, layout="column_major") assert np.allclose(ut.nnz_per(rows_matrix, per="row"), np.array([2, 3])) assert np.allclose(ut.nnz_per(columns_matrix, per="column"), np.array([1, 2, 2])) assert np.allclose(ut.sum_per(rows_matrix, per="row"), np.array([3, 12])) assert np.allclose(ut.sum_per(columns_matrix, per="column"), np.array([3, 5, 7])) assert np.allclose(ut.max_per(rows_matrix, per="row"), np.array([2, 5])) assert np.allclose(ut.max_per(columns_matrix, per="column"), np.array([3, 4, 5])) assert np.allclose(ut.min_per(rows_matrix, per="row"), np.array([0, 3])) assert np.allclose(ut.min_per(columns_matrix, per="column"), np.array([0, 1, 2])) assert np.allclose(ut.sum_squared_per(rows_matrix, per="row"), np.array([5, 50])) assert np.allclose(ut.sum_squared_per(columns_matrix, per="column"), np.array([9, 17, 29])) assert np.allclose(ut.fraction_per(rows_matrix, per="row"), np.array([3 / 15, 12 / 15])) assert np.allclose(ut.fraction_per(columns_matrix, per="column"), np.array([3 / 15, 5 / 15, 7 / 15])) assert np.allclose(ut.mean_per(rows_matrix, per="row"), np.array([3 / 3, 12 / 3])) assert np.allclose(ut.mean_per(columns_matrix, per="column"), np.array([3 / 2, 5 / 2, 7 / 2])) assert np.allclose( ut.variance_per(rows_matrix, per="row"), np.array([5 / 3 - (3 / 3) ** 2, 50 / 3 - (12 / 3) ** 2]) ) assert np.allclose( ut.variance_per(columns_matrix, per="column"), np.array([9 / 2 - (3 / 2) ** 2, 17 / 2 - (5 / 2) ** 2, 29 / 2 - (7 / 2) ** 2]), ) assert np.allclose( ut.normalized_variance_per(columns_matrix, per="column"), np.array( [(9 / 2 - (3 / 2) ** 2) / (3 / 2), (17 / 2 - (5 / 2) ** 2) / (5 / 2), (29 / 2 - (7 / 2) ** 2) / (7 / 2)] ), ) dense = ut.to_numpy_matrix(ut.fraction_by(rows_matrix, by="row")) assert np.allclose(dense, np.array([[0 / 3, 1 / 3, 2 / 3], [3 / 12, 4 / 12, 5 / 12]])) dense = ut.to_numpy_matrix(ut.fraction_by(columns_matrix, by="column")) assert np.allclose(dense, np.array([[0 / 3, 1 / 5, 2 / 7], [3 / 3, 4 / 5, 5 / 7]]))
def find_systematic_genes( what: Union[str, ut.Matrix] = "__x__", *, adata: AnnData, qdata: AnnData, atlas_total_umis: Optional[ut.Vector] = None, query_total_umis: Optional[ut.Vector] = None, low_gene_quantile: float = pr.systematic_low_gene_quantile, high_gene_quantile: float = pr.systematic_high_gene_quantile, to_property_name: str = "systematic_gene", ) -> None: """ Find genes that **Input** Annotated query ``qdata`` and atlas ``adata``, where the observations are cells and the variables are genes, where ``what`` is a per-variable-per-observation matrix or the name of a per-variable-per-observation annotation containing such a matrix. **Returns** A matrix whose rows are query metacells and columns are atlas metacells, where each entry is the weight of the atlas metacell in the projection of the query metacells. The sum of weights in each row (that is, for a single query metacell) is 1. The weighted sum of the atlas metacells using these weights is the "projected" image of the query metacell onto the atlas. In addition, sets the following annotations in ``qdata``: Variable (Gene) Annotations ``systematic_gene`` (or ``to_property_name``) A boolean mask indicating whether the gene is systematically higher or lower in the query compared to the atlas. **Computation Parameters** 1. Compute the fraction of each gene out of the total UMIs in both the atlas and the query. If ``atlas_total_umis`` and/or ``query_total_umis`` are given, use them as the basis instead of the sum of the UMIs. 2. Compute for each gene its ``low_gene_quantile`` (default: {low_gene_quantile}) fraction in the query, and its ``high_gene_quantile`` (default: {high_gene_quantile}) fraction in the atlas. 3. Compute for each gene its standard deviation in the atlas. 4. Mark as systematic the genes for which the low quantile value in the query is at least the atlas high quantile value. 5. Mark as systematic the genes for which the low quantile value in the atlas is at least the query high quantile value. """ assert 0 <= low_gene_quantile <= 1 assert 0 <= high_gene_quantile <= 1 assert np.all(adata.var_names == qdata.var_names) query_umis = ut.get_vo_proper(qdata, what, layout="row_major") atlas_umis = ut.get_vo_proper(adata, what, layout="row_major") atlas_fractions = ut.to_numpy_matrix(ut.fraction_by(atlas_umis, by="row", sums=atlas_total_umis)) query_fractions = ut.to_numpy_matrix(ut.fraction_by(query_umis, by="row", sums=query_total_umis)) query_fractions = ut.to_layout(query_fractions, layout="column_major") atlas_fractions = ut.to_layout(atlas_fractions, layout="column_major") query_low_gene_values = ut.quantile_per(query_fractions, low_gene_quantile, per="column") atlas_low_gene_values = ut.quantile_per(atlas_fractions, low_gene_quantile, per="column") query_high_gene_values = ut.quantile_per(query_fractions, high_gene_quantile, per="column") atlas_high_gene_values = ut.quantile_per(atlas_fractions, high_gene_quantile, per="column") query_above_atlas = query_low_gene_values > atlas_high_gene_values atlas_above_query = atlas_low_gene_values >= query_high_gene_values systematic = query_above_atlas | atlas_above_query ut.set_v_data(qdata, to_property_name, systematic)
def project_query_onto_atlas( what: Union[str, ut.Matrix] = "__x__", *, adata: AnnData, qdata: AnnData, atlas_total_umis: Optional[ut.Vector] = None, query_total_umis: Optional[ut.Vector] = None, project_log_data: bool = pr.project_log_data, fold_normalization: float = pr.project_fold_normalization, min_significant_gene_value: float = pr.project_min_significant_gene_value, max_consistency_fold_factor: float = pr.project_max_consistency_fold_factor, candidates_count: int = pr.project_candidates_count, min_usage_weight: float = pr.project_min_usage_weight, reproducible: bool, ) -> ut.CompressedMatrix: """ Project query metacells onto atlas metacells. **Input** Annotated query ``qdata`` and atlas ``adata``, where the observations are cells and the variables are genes, where ``what`` is a per-variable-per-observation matrix or the name of a per-variable-per-observation annotation containing such a matrix. Typically this data excludes any genes having a systematic difference between the query and the atlas, e.g. genes detected by by :py:func:`metacells.tools.project.find_systematic_genes`. **Returns** A matrix whose rows are query metacells and columns are atlas metacells, where each entry is the weight of the atlas metacell in the projection of the query metacells. The sum of weights in each row (that is, for a single query metacell) is 1. The weighted sum of the atlas metacells using these weights is the "projected" image of the query metacell onto the atlas. In addition, sets the following annotations in ``qdata``: Observation (Cell) Annotations ``similar`` A boolean mask indicating whether the query metacell is similar to its projection onto the atlas. If ``False`` the metacells is said to be "dissimilar", which may indicate the query contains cell states that do not appear in the atlas. **Computation Parameters** 0. All fold computations (log2 of the ratio between gene expressions as a fraction of the total UMIs) use the ``fold_normalization`` (default: {fold_normalization}). Fractions are computed based on the total UMIs, unless ``atlas_total_umis`` and/or ``query_total_umis`` are specified. For each query metacell: 1. Correlate the metacell with all the atlas metacells, and pick the highest-correlated one as the "anchor". If ``reproducible``, a slower (still parallel) but reproducible algorithm will be used. 2. Consider as candidates only atlas metacells whose maximal gene fold factor compared to the anchor is at most ``max_consistency_fold_factor`` (default: {max_consistency_fold_factor}). Ignore the fold factors of genes whose sum of UMIs in the anchor and the candidate metacells is less than ``min_significant_gene_value`` (default: {min_significant_gene_value}). 3. Select the ``candidates_count`` (default: {candidates_count}) candidate metacells with the highest correlation with the query metacell. 4. Compute the non-negative weights (with a sum of 1) of the selected candidates that give the best projection of the query metacells onto the atlas. Since the algorithm for computing these weights rarely produces an exact 0 weight, reduce all weights less than the ``min_usage_weight`` (default: {min_usage_weight}) to zero. If ``project_log_data`` (default: {project_log_data}), compute the match on the log of the data instead of the actual data. """ assert fold_normalization > 0 assert candidates_count > 0 assert min_usage_weight >= 0 assert max_consistency_fold_factor >= 0 assert np.all(adata.var_names == qdata.var_names) atlas_umis = ut.get_vo_proper(adata, what, layout="row_major") query_umis = ut.get_vo_proper(qdata, what, layout="row_major") if atlas_total_umis is None: atlas_total_umis = ut.sum_per(atlas_umis, per="row") atlas_total_umis = ut.to_numpy_vector(atlas_total_umis) if query_total_umis is None: query_total_umis = ut.sum_per(query_umis, per="row") query_total_umis = ut.to_numpy_vector(query_total_umis) atlas_fractions = ut.to_numpy_matrix(ut.fraction_by(atlas_umis, by="row", sums=atlas_total_umis)) query_fractions = ut.to_numpy_matrix(ut.fraction_by(query_umis, by="row", sums=query_total_umis)) atlas_fractions += fold_normalization query_fractions += fold_normalization atlas_log_fractions = np.log2(atlas_fractions) query_log_fractions = np.log2(query_fractions) atlas_fractions -= fold_normalization query_fractions -= fold_normalization if project_log_data: atlas_project_data = atlas_log_fractions query_project_data = query_log_fractions else: atlas_project_data = atlas_fractions query_project_data = query_fractions query_atlas_corr = ut.cross_corrcoef_rows(query_project_data, atlas_project_data, reproducible=reproducible) @ut.timed_call("project_single_metacell") def _project_single(query_metacell_index: int) -> Tuple[ut.NumpyVector, ut.NumpyVector]: return _project_single_metacell( atlas_umis=atlas_umis, query_atlas_corr=query_atlas_corr, atlas_project_data=atlas_project_data, query_project_data=query_project_data, atlas_log_fractions=atlas_log_fractions, candidates_count=candidates_count, min_significant_gene_value=min_significant_gene_value, min_usage_weight=min_usage_weight, max_consistency_fold_factor=max_consistency_fold_factor, query_metacell_index=query_metacell_index, ) results = list(ut.parallel_map(_project_single, qdata.n_obs)) indices = np.concatenate([result[0] for result in results], dtype="int32") data = np.concatenate([result[1] for result in results], dtype="float32") atlas_used_sizes = [len(result[0]) for result in results] atlas_used_sizes.insert(0, 0) indptr = np.cumsum(np.array(atlas_used_sizes)) return sp.csr_matrix((data, indices, indptr), shape=(qdata.n_obs, adata.n_obs))
def compute_knn_by_features( adata: AnnData, what: Union[str, ut.Matrix] = "__x__", *, max_top_feature_genes: int = pr.max_top_feature_genes, similarity_value_normalization: float = pr. umap_similarity_value_normalization, similarity_log_data: bool = pr.umap_similarity_log_data, similarity_method: str = pr.umap_similarity_method, logistics_location: float = pr.logistics_location, logistics_slope: float = pr.logistics_slope, k: int, balanced_ranks_factor: float = pr.knn_balanced_ranks_factor, incoming_degree_factor: float = pr.knn_incoming_degree_factor, outgoing_degree_factor: float = pr.knn_outgoing_degree_factor, reproducible: bool = pr.reproducible, ) -> ut.PandasFrame: """ Compute KNN graph between metacells based on feature genes. If ``reproducible`` (default: {reproducible}) is ``True``, a slower (still parallel) but reproducible algorithm will be used to compute pearson correlations. **Input** Annotated ``adata`` where each observation is a metacells and the variables are genes, are genes, where ``what`` is a per-variable-per-observation matrix or the name of a per-variable-per-observation annotation containing such a matrix. **Returns** Sets the following in ``adata``: Observations-Pair (Metacells) Annotations ``obs_outgoing_weights`` A sparse square matrix where each non-zero entry is the weight of an edge between a pair of cells or genes, where the sum of the weights of the outgoing edges for each element is 1 (there is always at least one such edge). Also return a pandas data frame of the similarities between the observations (metacells). **Computation Parameters** 1. Invoke :py:func:`metacells.tools.high.find_top_feature_genes` using ``max_top_feature_genes`` (default: {max_top_feature_genes}) to pick the feature genes to use to compute similarities between the metacells. 2. Compute the fractions of each gene in each cell, and add the ``similarity_value_normalization`` (default: {similarity_value_normalization}) to it. 3. If ``similarity_log_data`` (default: {similarity_log_data}), invoke the :py:func:`metacells.utilities.computation.log_data` function to compute the log (base 2) of the data. 4. Invoke :py:func:`metacells.tools.similarity.compute_obs_obs_similarity` using ``similarity_method`` (default: {similarity_method}), ``logistics_location`` (default: {logistics_slope}) and ``logistics_slope`` (default: {logistics_slope}) and convert this to distances. 5. Invoke :py:func:`metacells.tools.knn_graph.compute_obs_obs_knn_graph` using the distances, ``k`` (no default!), ``balanced_ranks_factor`` (default: {balanced_ranks_factor}), ``incoming_degree_factor`` (default: {incoming_degree_factor}), ``outgoing_degree_factor`` (default: {outgoing_degree_factor}) to compute a "skeleton" graph to overlay on top of the UMAP graph. """ tl.find_top_feature_genes(adata, max_genes=max_top_feature_genes) all_data = ut.get_vo_proper(adata, what, layout="row_major") all_fractions = ut.fraction_by(all_data, by="row") top_feature_genes_mask = ut.get_v_numpy(adata, "top_feature_gene") top_feature_genes_fractions = all_fractions[:, top_feature_genes_mask] top_feature_genes_fractions = ut.to_layout(top_feature_genes_fractions, layout="row_major") top_feature_genes_fractions = ut.to_numpy_matrix( top_feature_genes_fractions) top_feature_genes_fractions += similarity_value_normalization if similarity_log_data: top_feature_genes_fractions = ut.log_data(top_feature_genes_fractions, base=2) tdata = ut.slice(adata, vars=top_feature_genes_mask) similarities = tl.compute_obs_obs_similarity( tdata, top_feature_genes_fractions, method=similarity_method, reproducible=reproducible, logistics_location=logistics_location, logistics_slope=logistics_slope, inplace=False, ) assert similarities is not None tl.compute_obs_obs_knn_graph( adata, similarities, k=k, balanced_ranks_factor=balanced_ranks_factor, incoming_degree_factor=incoming_degree_factor, outgoing_degree_factor=outgoing_degree_factor, ) return similarities
def compute_outliers_matches( what: Union[str, ut.Matrix] = "__x__", *, adata: AnnData, gdata: AnnData, group: Union[str, ut.Vector] = "metacell", similar: str = "similar", value_normalization: float = pr.outliers_value_normalization, reproducible: bool, ) -> None: """ Given an assignment of observations (cells) to groups (metacells), compute for each outlier the "most similar" group. **Input** Annotated ``adata``, where the observations are cells and the variables are genes, where ``what`` is a per-variable-per-observation matrix or the name of a per-variable-per-observation annotation containing such a matrix. In addition, ``gdata`` is assumed to have one observation for each group, and use the same genes as ``adata``. **Returns** Sets the following in ``adata``: Per-Observation (Cell) Annotations ``similar`` (default: {similar}) For each observation (cell), the index of the "most similar" group. **Computation Parameters** 1. Compute the log2 of the fraction of each gene in each of the outlier cells and the group metacells using the ``value_normalization`` (default: {value_normalization}). 2. Cross-correlate each of the outlier cells with each of the group metacells, in a ``reproducible`` manner. """ group_of_cells = ut.get_o_numpy(adata, group) outliers_mask = group_of_cells < 0 odata = ut.slice(adata, obs=outliers_mask) outliers_data = ut.get_vo_proper(odata, what, layout="row_major") groups_data = ut.get_vo_proper(gdata, what, layout="row_major") outliers_fractions = ut.fraction_by(outliers_data, by="row") groups_fractions = ut.fraction_by(groups_data, by="row") outliers_fractions = ut.to_numpy_matrix(outliers_fractions) groups_fractions = ut.to_numpy_matrix(groups_fractions) outliers_fractions += value_normalization groups_fractions += value_normalization outliers_log_fractions = np.log2(outliers_fractions, out=outliers_fractions) groups_log_fractions = np.log2(groups_fractions, out=groups_fractions) outliers_groups_correlation = ut.cross_corrcoef_rows( outliers_log_fractions, groups_log_fractions, reproducible=reproducible) outliers_similar_group_indices = np.argmax(outliers_groups_correlation, axis=1) assert len(outliers_similar_group_indices) == odata.n_obs cells_similar_group_indices = np.full(adata.n_obs, -1, dtype="int32") cells_similar_group_indices[outliers_mask] = outliers_similar_group_indices ut.set_o_data(adata, similar, cells_similar_group_indices)
def compute_significant_projected_fold_factors( adata: AnnData, what: Union[str, ut.Matrix] = "__x__", *, total_umis: Optional[ut.Vector], projected: Union[str, ut.Matrix] = "projected", fold_normalization: float = pr.project_fold_normalization, min_significant_gene_value: float = pr.project_min_significant_gene_value, min_gene_fold_factor: float = pr.project_max_projection_fold_factor, min_entry_fold_factor: float = pr.min_entry_project_fold_factor, abs_folds: bool = pr.project_abs_folds, ) -> None: """ Compute the significant projected fold factors of genes for each query metacell. This computes, for each metacell of the query, the fold factors between the actual query UMIs and the UMIs of the projection of the metacell onto the atlas (see :py:func:`metacells.tools.project.project_query_onto_atlas`). The result per-metacell-per-gene matrix is then made sparse by discarding too-low values (setting them to zero). Ideally, this matrix should be "very" sparse. If it contains "too many" non-zero values, more genes need to be ignored by the projection, or somehow corrected for batch effects prior to computing the projection. **Input** Annotated ``adata``, where the observations are query metacells and the variables are genes, where ``what`` is a per-variable-per-observation matrix or the name of a per-variable-per-observation annotation containing such a matrix. In addition, the ``projected`` UMIs of each query metacells onto the atlas. **Returns** Sets the following in ``gdata``: Per-Variable Per-Observation (Gene-Cell) Annotations ``projected_fold`` For each gene and query metacell, the fold factor of this gene between the query and its projection (unless the value is too low to be of interest, in which case it will be zero). **Computation Parameters** 1. For each group (metacell), for each gene, compute the gene's fold factor log2((actual UMIs + ``fold_normalization``) / (expected UMIs + ``fold_normalization``)), similarly to :py:func:`metacells.tools.project.project_query_onto_atlas` (the default ``fold_normalization`` is {fold_normalization}). 2. Set the fold factor to zero for every case where the total UMIs in the query metacell and the projected image is not at least ``min_significant_gene_value`` (default: {min_significant_gene_value}). 3. If the maximal fold factor for a gene (across all metacells) is below ``min_gene_fold_factor`` (default: {min_gene_fold_factor}), then set all the gene's fold factors to zero (too low to be of interest). 4. Otherwise, for any metacell whose fold factor for the gene is less than ``min_entry_fold_factor`` (default: {min_entry_fold_factor}), set the fold factor to zero (too low to be of interest). If ``abs_folds`` (default: {abs_folds}), consider the absolute fold factors. """ assert 0 <= min_entry_fold_factor <= min_gene_fold_factor assert fold_normalization >= 0 metacells_data = ut.get_vo_proper(adata, what, layout="row_major") projected_data = ut.get_vo_proper(adata, projected, layout="row_major") metacells_fractions = ut.fraction_by(metacells_data, by="row", sums=total_umis) projected_fractions = ut.fraction_by(projected_data, by="row", sums=total_umis) metacells_fractions += fold_normalization # type: ignore projected_fractions += fold_normalization # type: ignore dense_folds = metacells_fractions / projected_fractions # type: ignore dense_folds = np.log2(dense_folds, out=dense_folds) total_umis = ut.to_numpy_matrix(metacells_data + projected_data) # type: ignore insignificant_folds_mask = total_umis < min_significant_gene_value ut.log_calc("insignificant entries", insignificant_folds_mask) dense_folds[insignificant_folds_mask] = 0.0 significant_folds = significant_folds_matrix(dense_folds, min_gene_fold_factor, min_entry_fold_factor, abs_folds) ut.set_vo_data(adata, "projected_fold", significant_folds)
def find_metacells_significant_genes( adata: AnnData, what: Union[str, ut.Matrix] = "__x__", *, min_gene_range_fold: float = pr.min_significant_metacells_gene_range_fold_factor, normalization: float = pr.metacells_gene_range_normalization, min_gene_fraction: float = pr.min_significant_metacells_gene_fraction, inplace: bool = True, ) -> Optional[ut.PandasSeries]: """ Find genes which have a significant signal in metacells data. This computation is too unreliable to be used on cells. Find genes which have a high maximal expression in at least one metacell, and a wide range of expression across the metacells. Such genes are good candidates for being used as marker genes and/or to compute distances between metacells. **Input** Annotated ``adata``, where the observations are cells and the variables are genes, where ``what`` is a per-variable-per-observation matrix or the name of a per-variable-per-observation annotation containing such a matrix. **Returns** Variable (Gene) Annotations ``significant_gene`` A boolean mask indicating whether each gene was found to be significant. If ``inplace`` (default: {inplace}), this is written to the data, and the function returns ``None``. Otherwise this is returned as a pandas series (indexed by the variable names). **Computation Parameters** 1. Compute the minimal and maximal expression level of each gene. 2. Select the genes whose fold factor (log2 of maximal over minimal value, using the ``normalization`` (default: {normalization}) is at least ``min_gene_range_fold`` (default: {min_gene_range_fold}). 3. Select the genes whose maximal expression is at least ``min_gene_fraction`` (default: {min_gene_fraction}). """ assert normalization >= 0 data = ut.get_vo_proper(adata, what, layout="row_major") fractions_of_genes = ut.to_layout(ut.fraction_by(data, by="row"), layout="column_major") min_fraction_of_genes = ut.min_per(fractions_of_genes, per="column") max_fraction_of_genes = ut.max_per(fractions_of_genes, per="column") high_max_fraction_genes_mask = max_fraction_of_genes >= min_gene_fraction ut.log_calc("high max fraction genes", high_max_fraction_genes_mask) min_fraction_of_genes += normalization max_fraction_of_genes += normalization max_fraction_of_genes /= min_fraction_of_genes range_fold_of_genes = np.log2(max_fraction_of_genes, out=max_fraction_of_genes) high_range_genes_mask = range_fold_of_genes >= min_gene_range_fold ut.log_calc("high range genes", high_range_genes_mask) significant_genes_mask = high_max_fraction_genes_mask & high_range_genes_mask if inplace: ut.set_v_data(adata, "significant_gene", significant_genes_mask) return None ut.log_return("significant_genes", significant_genes_mask) return ut.to_pandas_series(significant_genes_mask, index=adata.var_names)