def interaction_matrix( adata: AnnData, cluster_key: str, connectivity_key: Optional[str] = None, normalized: bool = False, copy: bool = False, ) -> Optional[np.ndarray]: """ Compute interaction matrix for clusters. Parameters ---------- %(adata)s %(cluster_key)s %(conn_key)s normalized If `True`, each row is normalized to sum to 1. %(copy)s Returns ------- If ``copy = True``, returns the interaction matrix. Otherwise, modifies the ``adata`` with the following key: - :attr:`anndata.AnnData.uns` ``['{cluster_key}_interactions']`` - the interaction matrix. """ connectivity_key = Key.obsp.spatial_conn(connectivity_key) _assert_categorical_obs(adata, cluster_key) _assert_connectivity_key(adata, connectivity_key) graph = nx.from_scipy_sparse_matrix(adata.obsp[connectivity_key]) cluster = { i: { cluster_key: x } for i, x in enumerate(adata.obs[cluster_key]) } nx.set_node_attributes(graph, cluster) int_mat = np.asarray( nx.attr_matrix(graph, node_attr=cluster_key, normalized=normalized, rc_order=adata.obs[cluster_key].cat.categories)) if copy: return int_mat _save_data(adata, attr="uns", key=Key.uns.interaction_matrix(cluster_key), data=int_mat)
def centrality_scores( adata: AnnData, cluster_key: str, score: Optional[Union[str, Iterable[str]]] = None, connectivity_key: Optional[str] = None, copy: bool = False, n_jobs: Optional[int] = None, backend: str = "loky", show_progress_bar: bool = False, ) -> Optional[pd.DataFrame]: """ Compute centrality scores per cluster or cell type. Inspired by usage in Gene Regulatory Networks (GRNs) in :cite:`celloracle`. Parameters ---------- %(adata)s %(cluster_key)s score Centrality measures as described in :class:`networkx.algorithms.centrality` :cite:`networkx`. If `None`, use all the options below. Valid options are: - `{c.CLOSENESS.s!r}` - measure of how close the group is to other nodes. - `{c.CLUSTERING.s!r}` - measure of the degree to which nodes cluster together. - `{c.DEGREE.s!r}` - fraction of non-group members connected to group members. %(conn_key)s %(copy)s %(parallelize)s Returns ------- If ``copy = True``, returns a :class:`pandas.DataFrame`. Otherwise, modifies the ``adata`` with the following key: - :attr:`anndata.AnnData.uns` ``['{{cluster_key}}_centrality_scores']`` - the centrality scores, as mentioned above. """ connectivity_key = Key.obsp.spatial_conn(connectivity_key) _assert_categorical_obs(adata, cluster_key) _assert_connectivity_key(adata, connectivity_key) if isinstance(score, (str, Centrality)): centrality = [score] elif score is None: centrality = [c.s for c in Centrality] centralities = [Centrality(c) for c in centrality] graph = nx.from_scipy_sparse_matrix(adata.obsp[connectivity_key]) cat = adata.obs[cluster_key].cat.categories.values clusters = adata.obs[cluster_key].values fun_dict = {} for c in centralities: if c == Centrality.CLOSENESS: fun_dict[c.s] = partial( nx.algorithms.centrality.group_closeness_centrality, graph) elif c == Centrality.DEGREE: fun_dict[c.s] = partial( nx.algorithms.centrality.group_degree_centrality, graph) elif c == Centrality.CLUSTERING: fun_dict[c.s] = partial(nx.algorithms.cluster.average_clustering, graph) else: raise NotImplementedError( f"Centrality `{c}` is not yet implemented.") n_jobs = _get_n_cores(n_jobs) start = logg.info( f"Calculating centralities `{centralities}` using `{n_jobs}` core(s)") res_list = [] for k, v in fun_dict.items(): df = parallelize( _centrality_scores_helper, collection=cat, extractor=pd.concat, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, )(clusters=clusters, fun=v, method=k) res_list.append(df) df = pd.concat(res_list, axis=1) if copy: return df _save_data(adata, attr="uns", key=Key.uns.centrality_scores(cluster_key), data=df, time=start)
def spatial_neighbors( adata: AnnData, spatial_key: str = Key.obsm.spatial, coord_type: Optional[Union[str, CoordType]] = None, n_rings: int = 1, n_neigh: int = 6, delaunay: bool = False, radius: Optional[float] = None, transform: Optional[Union[str, Transform]] = None, key_added: Optional[str] = None, ) -> None: """ Create a graph from spatial coordinates. Parameters ---------- %(adata)s %(spatial_key)s coord_type Type of coordinate system. Valid options are: - `{c.VISIUM!r}` - Visium coordinates. - `{c.GENERIC!r}` - generic coordinates. If `None`, use `{c.VISIUM!r}` if ``spatial_key`` is present in :attr:`anndata.AnnData.obsm`, otherwise use `{c.GENERIC!r}`. n_rings Number of rings of neighbors for Visium data. n_neigh Number of neighborhoods to consider for non-Visium data. delaunay Whether to compute the graph from Delaunay triangulation. radius Radius of neighbors for non-Visium data. transform Type of adjacency matrix transform. Valid options are: - `{t.SPECTRAL.s!r}` - spectral transformation of the adjacency matrix. - `{t.COSINE.s!r}` - cosine transformation of the adjacency matrix. - `{t.NONE.v}` - no transformation of the adjacency matrix. key_added Key which controls where the results are saved. Returns ------- Modifies the ``adata`` with the following keys: - :attr:`anndata.AnnData.obsp` ``['{{key_added}}_connectivities']`` - spatial connectivity matrix. - :attr:`anndata.AnnData.obsp` ``['{{key_added}}_distances']`` - spatial distances matrix. - :attr:`anndata.AnnData.uns` ``['{{key_added}}']`` - spatial neighbors dictionary. """ _assert_positive(n_rings, name="n_rings") _assert_positive(n_neigh, name="n_neigh") _assert_spatial_basis(adata, spatial_key) transform = Transform.NONE if transform is None else Transform(transform) if coord_type is None: coord_type = CoordType.VISIUM if Key.uns.spatial in adata.uns else CoordType.GENERIC else: coord_type = CoordType(coord_type) start = logg.info( f"Creating graph using `{coord_type}` coordinates and `{transform}` transform" ) coords = adata.obsm[spatial_key] if coord_type == CoordType.VISIUM: if n_rings > 1: Adj: csr_matrix = _build_connectivity(coords, 6, neigh_correct=True, set_diag=True, delaunay=delaunay, return_distance=False) Res = Adj Walk = Adj for i in range(n_rings - 1): Walk = Walk @ Adj with warnings.catch_warnings(): warnings.simplefilter("ignore", SparseEfficiencyWarning) Walk[Res.nonzero()] = 0.0 Walk.eliminate_zeros() Walk.data[:] = i + 2.0 Res = Res + Walk Adj = Res Adj.setdiag(0.0) Adj.eliminate_zeros() Dst = Adj.copy() Adj.data[:] = 1.0 else: Adj = _build_connectivity(coords, 6, neigh_correct=True, delaunay=delaunay) Dst = None elif coord_type == CoordType.GENERIC: Adj, Dst = _build_connectivity(coords, n_neigh, radius, delaunay=delaunay, return_distance=True) else: raise NotImplementedError(coord_type) # check transform if transform == Transform.SPECTRAL: Adj = _transform_a_spectral(Adj) elif transform == Transform.COSINE: Adj = _transform_a_cosine(Adj) elif transform == Transform.NONE: pass else: raise NotImplementedError( f"Transform `{transform}` is not yet implemented.") neighs_key = Key.uns.spatial_neighs(key_added) conns_key = Key.obsp.spatial_conn(key_added) dists_key = Key.obsp.spatial_dist(key_added) neighbors_dict = { "connectivities_key": conns_key, "params": { "n_neighbors": n_neigh, "coord_type": coord_type.v, "radius": radius, "transform": transform.v }, "distances_key": dists_key, } _save_data(adata, attr="obsp", key=conns_key, data=Adj) if Dst is not None: _save_data(adata, attr="obsp", key=dists_key, data=Dst, prefix=False) _save_data(adata, attr="uns", key=neighs_key, data=neighbors_dict, prefix=False, time=start)
def nhood_enrichment( adata: AnnData, cluster_key: str, connectivity_key: Optional[str] = None, n_perms: int = 1000, numba_parallel: bool = False, seed: Optional[int] = None, copy: bool = False, n_jobs: Optional[int] = None, backend: str = "loky", show_progress_bar: bool = True, ) -> Optional[Tuple[np.ndarray, np.ndarray]]: """ Compute neighborhood enrichment by permutation test. Parameters ---------- %(adata)s %(cluster_key)s %(conn_key)s %(n_perms)s %(numba_parallel)s %(seed)s %(copy)s %(parallelize)s Returns ------- If ``copy = True``, returns a :class:`tuple` with the z-score and the enrichment count. Otherwise, modifies the ``adata`` with the following keys: - :attr:`anndata.AnnData.uns` ``['{cluster_key}_nhood_enrichment']['zscore']`` - the enrichment z-score. - :attr:`anndata.AnnData.uns` ``['{cluster_key}_nhood_enrichment']['count']`` - the enrichment count. """ connectivity_key = Key.obsp.spatial_conn(connectivity_key) _assert_categorical_obs(adata, cluster_key) _assert_connectivity_key(adata, connectivity_key) _assert_positive(n_perms, name="n_perms") adj = adata.obsp[connectivity_key] original_clust = adata.obs[cluster_key] clust_map = { v: i for i, v in enumerate(original_clust.cat.categories.values) } # map categories int_clust = np.array([clust_map[c] for c in original_clust], dtype=ndt) indices, indptr = (adj.indices.astype(ndt), adj.indptr.astype(ndt)) n_cls = len(clust_map) _test = _create_function(n_cls, parallel=numba_parallel) count = _test(indices, indptr, int_clust) n_jobs = _get_n_cores(n_jobs) start = logg.info( f"Calculating neighborhood enrichment using `{n_jobs}` core(s)") perms = parallelize( _nhood_enrichment_helper, collection=np.arange(n_perms), extractor=np.vstack, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, )(callback=_test, indices=indices, indptr=indptr, int_clust=int_clust, n_cls=n_cls, seed=seed) zscore = (count - perms.mean(axis=0)) / perms.std(axis=0) if copy: return zscore, count _save_data( adata, attr="uns", key=Key.uns.nhood_enrichment(cluster_key), data={ "zscore": zscore, "count": count }, time=start, )
def test( self, cluster_key: str, clusters: Optional[Cluster_t] = None, n_perms: int = 1000, threshold: float = 0.01, seed: Optional[int] = None, corr_method: Optional[str] = None, corr_axis: Union[str, CorrAxis] = CorrAxis.INTERACTIONS.v, alpha: float = 0.05, copy: bool = False, key_added: Optional[str] = None, numba_parallel: Optional[bool] = None, **kwargs: Any, ) -> Optional[Mapping[str, pd.DataFrame]]: """ Perform the permutation test as described in :cite:`cellphonedb`. Parameters ---------- %(cluster_key)s clusters Clusters from :attr:`anndata.AnnData.obs` ``['{{cluster_key}}']``. Can be specified either as a sequence of :class:`tuple` or just a sequence of cluster names, in which case all combinations considered. %(n_perms)s threshold Do not perform permutation test if any of the interacting components is being expressed in less than ``threshold`` percent of cells within a given cluster. %(seed)s %(corr_method)s corr_axis Axis over which to perform the FDR correction. Only used when ``corr_method != None``. Valid options are: - `{fa.INTERACTIONS.s!r}` - correct interactions by performing FDR correction across the clusters. - `{fa.CLUSTERS.s!r}` - correct clusters by performing FDR correction across the interactions. alpha Significance level for FDR correction. Only used when ``corr_method != None``. %(copy)s key_added Key in :attr:`anndata.AnnData.uns` where the result is stored if ``copy = False``. If `None`, ``'{{cluster_key}}_ligrec'`` will be used. %(numba_parallel)s %(parallelize)s Returns ------- %(ligrec_test_returns)s """ _assert_positive(n_perms, name="n_perms") _assert_categorical_obs(self._adata, key=cluster_key) if corr_method is not None: corr_axis = CorrAxis(corr_axis) if TYPE_CHECKING: assert isinstance(corr_axis, CorrAxis) if len(self._adata.obs[cluster_key].cat.categories) <= 1: raise ValueError( f"Expected at least `2` clusters, found `{len(self._adata.obs[cluster_key].cat.categories)}`." ) if TYPE_CHECKING: assert isinstance(self.interactions, pd.DataFrame) assert isinstance(self._filtered_data, pd.DataFrame) interactions = self.interactions[[SOURCE, TARGET]] self._filtered_data["clusters"] = self._adata.obs[cluster_key].astype( "string").astype("category").values if clusters is None: clusters = list( map(str, self._adata.obs[cluster_key].cat.categories)) if all(isinstance(c, str) for c in clusters): clusters = list(product( clusters, repeat=2)) # type: ignore[no-redef,assignment] clusters = sorted( _check_tuple_needles( clusters, # type: ignore[arg-type] self._filtered_data["clusters"].cat.categories, msg="Invalid cluster `{0!r}`.", reraise=True, )) clusters_flat = list({c for cs in clusters for c in cs}) data = self._filtered_data.loc[ np.isin(self._filtered_data["clusters"], clusters_flat), :] data["clusters"] = data["clusters"].cat.remove_unused_categories() cat = data["clusters"].cat cluster_mapper = dict(zip(cat.categories, range(len(cat.categories)))) gene_mapper = dict(zip(data.columns[:-1], range(len(data.columns) - 1))) # -1 for 'clusters' data.columns = [ gene_mapper[c] if c != "clusters" else c for c in data.columns ] clusters_ = np.array([[cluster_mapper[c1], cluster_mapper[c2]] for c1, c2 in clusters], dtype=np.uint32) cat.rename_categories(cluster_mapper, inplace=True) # much faster than applymap (tested on 1M interactions) interactions_ = np.vectorize(lambda g: gene_mapper[g])( interactions.values) n_jobs = _get_n_cores(kwargs.pop("n_jobs", None)) start = logg.info( f"Running `{n_perms}` permutations on `{len(interactions)}` interactions " f"and `{len(clusters)}` cluster combinations using `{n_jobs}` core(s)" ) res = _analysis( data, interactions_, clusters_, threshold=threshold, n_perms=n_perms, seed=seed, n_jobs=n_jobs, numba_parallel=numba_parallel, **kwargs, ) res = { "means": _create_sparse_df( res.means, index=pd.MultiIndex.from_frame(interactions, names=[SOURCE, TARGET]), columns=pd.MultiIndex.from_tuples( clusters, names=["cluster_1", "cluster_2"]), fill_value=0, ), "pvalues": _create_sparse_df( res.pvalues, index=pd.MultiIndex.from_frame(interactions, names=[SOURCE, TARGET]), columns=pd.MultiIndex.from_tuples( clusters, names=["cluster_1", "cluster_2"]), fill_value=np.nan, ), "metadata": self.interactions[self.interactions.columns.difference( [SOURCE, TARGET])], } res["metadata"].index = res["means"].index.copy() if TYPE_CHECKING: assert isinstance(res, dict) if corr_method is not None: logg.info(f"Performing FDR correction across the `{corr_axis.v}` " f"using method `{corr_method}` at level `{alpha}`") res["pvalues"] = _fdr_correct(res["pvalues"], corr_method, corr_axis, alpha=alpha) if copy: logg.info("Finish", time=start) return res _save_data(self._adata, attr="uns", key=Key.uns.ligrec(cluster_key, key_added), data=res, time=start)
def interaction_matrix( adata: AnnData, cluster_key: str, connectivity_key: Optional[str] = None, normalized: bool = False, copy: bool = False, weights: bool = False, ) -> Optional[np.ndarray]: """ Compute interaction matrix for clusters. Parameters ---------- %(adata)s %(cluster_key)s %(conn_key)s normalized If `True`, each row is normalized to sum to 1. %(copy)s weights Whether to use edge weights or binarize. Returns ------- If ``copy = True``, returns the interaction matrix. Otherwise, modifies the ``adata`` with the following key: - :attr:`anndata.AnnData.uns` ``['{cluster_key}_interactions']`` - the interaction matrix. """ connectivity_key = Key.obsp.spatial_conn(connectivity_key) _assert_categorical_obs(adata, cluster_key) _assert_connectivity_key(adata, connectivity_key) cats = adata.obs[cluster_key] mask = ~pd.isnull(cats).values cats = cats.loc[mask] if not len(cats): raise RuntimeError( f"After removing NaNs in `adata.obs[{cluster_key!r}]`, none remain." ) g = adata.obsp[connectivity_key] g = g[mask, :][:, mask] n_cats = len(cats.cat.categories) if weights: g_data = g.data else: g_data = np.broadcast_to(1, shape=len(g.data)) if pd.api.types.is_bool_dtype(g.dtype) or pd.api.types.is_integer_dtype( g.dtype): dtype = np.intp else: dtype = np.float_ output = np.zeros((n_cats, n_cats), dtype=dtype) _interaction_matrix(g_data, g.indices, g.indptr, cats.cat.codes.to_numpy(), output) if normalized: output = output / output.sum(axis=1).reshape((-1, 1)) if copy: return output _save_data(adata, attr="uns", key=Key.uns.interaction_matrix(cluster_key), data=output)
def ripley_k( adata: AnnData, cluster_key: str, spatial_key: str = Key.obsm.spatial, mode: str = "ripley", support: int = 100, copy: bool = False, ) -> Optional[pd.DataFrame]: r""" Calculate `Ripley's K <https://en.wikipedia.org/wiki/Spatial_descriptive_statistics#Ripley's_K_and_L_functions>`_ statistics for each cluster in the tissue coordinates. Parameters ---------- %(adata)s %(cluster_key)s %(spatial_key)s mode Keyword which indicates the method for edge effects correction. See :class:`astropy.stats.RipleysKEstimator` for valid options. support Number of points where Ripley's K is evaluated between a fixed radii with :math:`min=0`, :math:`max=\sqrt{{area \over 2}}`. %(copy)s Returns ------- If ``copy = True``, returns a :class:`pandas.DataFrame` with the following keys: - `'ripley_k'` - the Ripley's K statistic. - `'distance'` - set of distances where the estimator was evaluated. Otherwise, modifies the ``adata`` with the following key: - :attr:`anndata.AnnData.uns` ``['{{cluster_key}}_ripley_k']`` - the above mentioned dataframe. """ # noqa: D205, D400 try: # from pointpats import ripley, hull from astropy.stats import RipleysKEstimator except ImportError: raise ImportError( "Please install `astropy` as `pip install astropy`.") from None _assert_spatial_basis(adata, key=spatial_key) coord = adata.obsm[spatial_key] # set coordinates y_min = int(coord[:, 1].min()) y_max = int(coord[:, 1].max()) x_min = int(coord[:, 0].min()) x_max = int(coord[:, 0].max()) area = int((x_max - x_min) * (y_max - y_min)) r = np.linspace(0, (area / 2)**0.5, support) # set estimator Kest = RipleysKEstimator(area=area, x_max=x_max, y_max=y_max, x_min=x_min, y_min=y_min) df_lst = [] # TODO: how long does this take (i.e. does it make sense to measure the elapse time?) logg.info("Calculating Ripley's K") for c in adata.obs[cluster_key].unique(): idx = adata.obs[cluster_key].values == c coord_sub = coord[idx, :] est = Kest(data=coord_sub, radii=r, mode=mode) df_est = pd.DataFrame(np.stack([est, r], axis=1)) df_est.columns = ["ripley_k", "distance"] df_est[cluster_key] = c df_lst.append(df_est) df = pd.concat(df_lst, axis=0) # filter by min max dist minmax_dist = df.groupby(cluster_key)["ripley_k"].max().min() df = df[df.ripley_k < minmax_dist].copy() if copy: return df adata.uns[f"ripley_k_{cluster_key}"] = df _save_data(adata, attr="uns", key=Key.uns.ripley_k(cluster_key), data=df)
def moran( adata: AnnData, connectivity_key: str = Key.obsp.spatial_conn(), genes: Optional[Union[str, Sequence[str]]] = None, transformation: Literal["r", "B", "D", "U", "V"] = "r", n_perms: int = 1000, corr_method: Optional[str] = "fdr_bh", layer: Optional[str] = None, seed: Optional[int] = None, copy: bool = False, n_jobs: Optional[int] = None, backend: str = "loky", show_progress_bar: bool = True, ) -> Optional[pd.DataFrame]: """ Calculate Moran’s I Global Autocorrelation Statistic. Parameters ---------- %(adata)s %(conn_key)s genes List of gene names, as stored in :attr:`anndata.AnnData.var_names`, used to compute Moran's I statistics :cite:`pysal`. If `None`, it's computed :attr:`anndata.AnnData.var` ``['highly_variable']``, if present. Otherwise, it's computed for all genes. transformation Transformation to be used, as reported in :class:`esda.Moran`. Default is `"r"`, row-standardized. %(n_perms)s %(corr_method)s layer Layer in :attr:`anndata.AnnData.layers` to use. If `None`, use :attr:`anndata.AnnData.X`. %(seed)s %(copy)s %(parallelize)s Returns ------- If ``copy = True``, returns a :class:`pandas.DataFrame` with the following keys: - `'I'` - Moran's I statistic. - `'pval_sim'` - p-value based on permutations. - `'VI_sim'` - variance of `'I'` from permutations. - `'pval_sim_{{corr_method}}'` - the corrected p-values if ``corr_method != None`` . Otherwise, modifies the ``adata`` with the following key: - :attr:`anndata.AnnData.uns` ``['moranI']`` - the above mentioned dataframe. """ if esda is None or libpysal is None: raise ImportError( "Please install `esda` and `libpysal` as `pip install esda libpysal`." ) _assert_positive(n_perms, name="n_perms") _assert_connectivity_key(adata, connectivity_key) if genes is None: if "highly_variable" in adata.var.columns: genes = adata[:, adata.var.highly_variable.values].var_names.values else: genes = adata.var_names.values genes = _assert_non_empty_sequence(genes, name="genes") n_jobs = _get_n_cores(n_jobs) start = logg.info( f"Calculating for `{len(genes)}` genes using `{n_jobs}` core(s)") w = _set_weight_class(adata, key=connectivity_key) # init weights df = parallelize( _moran_helper, collection=genes, extractor=pd.concat, use_ixs=True, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, )(adata=adata, weights=w, transformation=transformation, permutations=n_perms, layer=layer, seed=seed) if corr_method is not None: _, pvals_adj, _, _ = multipletests(df["pval_sim"].values, alpha=0.05, method=corr_method) df[f"pval_sim_{corr_method}"] = pvals_adj df.sort_values(by="I", ascending=False, inplace=True) if copy: logg.info("Finish", time=start) return df _save_data(adata, attr="uns", key="moranI", data=df, time=start)
interval=interval, ) if len(idx_splits) == 1: out = list(out_lst)[0] else: out = sum(list(out_lst)) / len(idx_splits) if copy: logg.info("Finish", time=start) return out, interval _save_data(adata, attr="uns", key=Key.uns.co_occurrence(cluster_key), data={ "occ": out, "interval": interval }, time=start) def _find_min_max(spatial: np.ndarray) -> Tuple[float, float]: coord_sum = np.sum(spatial, axis=1) min_idx, min_idx2 = np.argpartition(coord_sum, 2)[0:2] max_idx = np.argmax(coord_sum) thres_max = (pairwise_distances( spatial[min_idx, :].reshape(1, -1), spatial[max_idx, :].reshape(1, -1), )[0][0] / 2.0).astype(fp) thres_min = pairwise_distances(
def spatial_autocorr( adata: AnnData, connectivity_key: str = Key.obsp.spatial_conn(), genes: Optional[Union[str, Sequence[str]]] = None, mode: Literal[ "moran", "geary"] = SpatialAutocorr.MORAN.s, # type: ignore[assignment] transformation: bool = True, n_perms: Optional[int] = None, two_tailed: bool = False, corr_method: Optional[str] = "fdr_bh", layer: Optional[str] = None, seed: Optional[int] = None, use_raw: bool = False, copy: bool = False, n_jobs: Optional[int] = None, backend: str = "loky", show_progress_bar: bool = True, ) -> Optional[pd.DataFrame]: """ Calculate Global Autocorrelation Statistic (Moran’s I or Geary's C). See :cite:`pysal` for reference. Parameters ---------- %(adata)s %(conn_key)s genes List of gene names, as stored in :attr:`anndata.AnnData.var_names`, used to compute global spatial autocorrelation statistic. If `None`, it's computed :attr:`anndata.AnnData.var` ``['highly_variable']``, if present. Otherwise, it's computed for all genes. mode Mode of score calculation: - `{sp.MORAN.s!r}` - `Moran's I autocorrelation <https://en.wikipedia.org/wiki/Moran%27s_I>`_. - `{sp.GEARY.s!r}` - `Geary's C autocorrelation <https://en.wikipedia.org/wiki/Geary%27s_C>`_. transformation If `True`, weights in :attr:`anndata.AnnData.obsp` ``['{key}']`` are row-normalized, advised for analytic p-value calculation. %(n_perms)s If `None`, only p-values under normality assumption are computed. two_tailed If `True`, p-values are two-tailed, otherwise they are one-tailed. %(corr_method)s layer Layer in :attr:`anndata.AnnData.layers` to use. If `None`, use :attr:`anndata.AnnData.X`. %(seed)s %(copy)s %(parallelize)s Returns ------- If ``copy = True``, returns a :class:`pandas.DataFrame` with the following keys: - `'I' or 'C'` - Moran's I or Geary's C statistic. - `'pval_norm'` - p-value under normality assumption. - `'var_norm'` - variance of `'score'` under normality assumption. - `'{{p_val}}_{{corr_method}}'` - the corrected p-values if ``corr_method != None`` . If ``n_perms != None`` is not None, additionally returns the following columns: - `'pval_z_sim'` - p-value based on standard normal approximation from permutations. - `'pval_sim'` - p-value based on permutations. - `'var_sim'` - variance of `'score'` from permutations. Otherwise, modifies the ``adata`` with the following key: - :attr:`anndata.AnnData.uns` ``['moranI']`` - the above mentioned dataframe, if ``mode = {sp.MORAN.s!r}``. - :attr:`anndata.AnnData.uns` ``['gearyC']`` - the above mentioned dataframe, if ``mode = {sp.GEARY.s!r}``. """ _assert_connectivity_key(adata, connectivity_key) if genes is None: if "highly_variable" in adata.var.columns: genes = adata[:, adata.var.highly_variable.values].var_names.values else: genes = adata.var_names.values genes = _assert_non_empty_sequence(genes, name="genes") mode = SpatialAutocorr(mode) # type: ignore[assignment] if TYPE_CHECKING: assert isinstance(mode, SpatialAutocorr) params = { "mode": mode.s, "transformation": transformation, "two_tailed": two_tailed } if mode == SpatialAutocorr.MORAN: params["func"] = _morans_i params["stat"] = "I" params["expected"] = -1.0 / (adata.shape[0] - 1) # expected score params["ascending"] = False elif mode == SpatialAutocorr.GEARY: params["func"] = _gearys_c params["stat"] = "C" params["expected"] = 1.0 params["ascending"] = True else: raise NotImplementedError(f"Mode `{mode}` is not yet implemented.") n_jobs = _get_n_cores(n_jobs) vals = _get_obs_rep(adata[:, genes], use_raw=use_raw, layer=layer).T g = adata.obsp[connectivity_key].copy() # row-normalize if transformation: normalize(g, norm="l1", axis=1, copy=False) score = params["func"](g, vals) start = logg.info( f"Calculating {mode}'s statistic for `{n_perms}` permutations using `{n_jobs}` core(s)" ) if n_perms is not None: _assert_positive(n_perms, name="n_perms") perms = np.arange(n_perms) score_perms = parallelize( _score_helper, collection=perms, extractor=np.concatenate, use_ixs=True, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, )(mode=mode, g=g, vals=vals, seed=seed) else: score_perms = None with np.errstate(divide="ignore"): pval_results = _p_value_calc(score, score_perms, g, params) results = {params["stat"]: score} results.update(pval_results) df = pd.DataFrame(results, index=genes) if corr_method is not None: for pv in filter(lambda x: "pval" in x, df.columns): _, pvals_adj, _, _ = multipletests(df[pv].values, alpha=0.05, method=corr_method) df[f"{pv}_{corr_method}"] = pvals_adj df.sort_values(by=params["stat"], ascending=params["ascending"], inplace=True) if copy: logg.info("Finish", time=start) return df _save_data(adata, attr="uns", key=params["mode"] + params["stat"], data=df, time=start)
def calculate_image_features( adata: AnnData, img: ImageContainer, layer: Optional[str] = None, features: Union[str, Sequence[str]] = ImageFeature.SUMMARY.s, features_kwargs: Mapping[str, Mapping[str, Any]] = MappingProxyType({}), key_added: str = "img_features", copy: bool = False, n_jobs: Optional[int] = None, backend: str = "loky", show_progress_bar: bool = True, **kwargs: Any, ) -> Optional[pd.DataFrame]: """ Calculate image features for all observations in ``adata``. Parameters ---------- %(adata)s %(img_container)s %(img_layer)s features Features to be calculated. Valid options are: - `{f.TEXTURE.s!r}` - summary stats based on repeating patterns :meth:`squidpy.im.ImageContainer.features_texture`. - `{f.SUMMARY.s!r}` - summary stats of each image channel :meth:`squidpy.im.ImageContainer.features_summary`. - `{f.COLOR_HIST.s!r}` - counts in bins of image channel's histogram :meth:`squidpy.im.ImageContainer.features_histogram`. - `{f.SEGMENTATION.s!r}` - stats of a cell segmentation mask :meth:`squidpy.im.ImageContainer.features_segmentation`. - `{f.CUSTOM.s!r}` - extract features using a custom function :meth:`squidpy.im.ImageContainer.features_custom`. features_kwargs Keyword arguments for the different features that should be generated, such as ``{{ {f.TEXTURE.s!r}: {{ ... }}, ... }}``. key_added Key in :attr:`anndata.AnnData.obsm` where to store the calculated features. %(copy)s %(parallelize)s kwargs Keyword arguments for :meth:`squidpy.im.ImageContainer.generate_spot_crops`. Returns ------- If ``copy = True``, returns a :class:`panda.DataFrame` where columns correspond to the calculated features. Otherwise, modifies the ``adata`` object with the following key: - :attr:`anndata.AnnData.uns` ``['{{key_added}}']`` - the above mentioned dataframe. Raises ------ ValueError If a feature is not known. """ layer = img._get_layer(layer) if isinstance(features, (str, ImageFeature)): features = [features] features = sorted({ImageFeature(f).s for f in features}) n_jobs = _get_n_cores(n_jobs) start = logg.info( f"Calculating features `{list(features)}` using `{n_jobs}` core(s)") res = parallelize( _calculate_image_features_helper, collection=adata.obs_names, extractor=pd.concat, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, )(adata, img, layer=layer, features=features, features_kwargs=features_kwargs, **kwargs) if copy: logg.info("Finish", time=start) return res _save_data(adata, attr="obsm", key=key_added, data=res, time=start)