def cell_morphology(data: AnnData, area_key: Optional[str] = None, eccentricity_key: Optional[str] = None, **kwargs): """Cell morphology variation between different groups This function only works for data with cell shape information. The area is calculated using shoelace formula The eccentricity is assume that the cell is close to ellipse, the semi-minor and semi-major axis is get from the bbox side. Args: data: {adata} area_key: The key to store cell area, Default: 'area' eccentricity_key: The key to store cell eccentricity, Default: 'eccentricity' **kwargs: {analysis_kwargs} """ ab = AnalysisBase(data, display_name="Cell morphology", **kwargs) shapes = read_shapes(data.obs, ab.shape_key) areas = multipolygons_area(shapes) eccentricity = [ bbox_eccentricity(bbox) for bbox in multipoints_bbox(shapes) ] area_key = ab.area_key if area_key is None else area_key eccentricity_key = ab.eccentricity_key if eccentricity_key is None else eccentricity_key col2adata_obs(areas, data, area_key) col2adata_obs(eccentricity, data, eccentricity_key) ab.stop_timer() # write to obs, stop timer manually
def test_exp_obs_naming(data2d): ab = AnalysisBase(data=data2d, roi_key="ROI") assert ab.exp_obs == ["ROI"] assert ab.roi_key == "ROI" ab = AnalysisBase(data=data2d, exp_obs=["ROI"]) assert ab.exp_obs == ["ROI"] assert ab.roi_key == "ROI"
def neighbors_map( data: AnnData, roi: str, cell_type_key: Optional[str] = None, centroid_key: Optional[str] = None, roi_key: Optional[str] = None, **plot_options, ): """Visualize neighbors network built in a ROI Args: data: {adata_plotting} roi: {roi} cell_type_key: {cell_type_key} centroid_key: {centroid_key} roi_key: {roi_key} **plot_options: Returns: """ # cell_type_key = Config.cell_type_key if cell_type_key is None else cell_type_key # centroid_key = Config.centroid_key if centroid_key is None else centroid_key # roi_key = Config.roi_key if roi_key is None else roi_key ab = AnalysisBase(data, cell_type_key=cell_type_key, centroid_key=centroid_key, roi_key=roi_key) iter_data = data.obs.copy() points = ab.get_points() if len(points[0]) == 3: raise NotImplementedError("Does not support 3D neighbor map") iter_data['__spatial_centroid'] = points roi_info = iter_data[iter_data[ab.roi_key] == roi] if len(roi_info) == 0: raise ValueError(f"ROI not exist, roi = {roi}") cell_types = roi_info[ab.cell_type_key] if ab.has_cell_type else None internal_kwargs = dict(legend_title="Cell type", **plot_options) cells = np.array(roi_info['__spatial_centroid'].to_list()) x, y = cells[:, 0], cells[:, 1] neighbors = read_neighbors(roi_info, "cell_neighbors") labels = roi_info["cell_id"].astype(int) nmin = labels.min() links = [] for l, neigh in zip(labels, neighbors): for n in neigh: if n > l: links.append((n - nmin, l - nmin)) return point_map(x, y, types=cell_types, links=links, **internal_kwargs)
def somde( data: AnnData, k: int = 20, alpha: float = 0.5, epoch: int = 100, pval: float = 0.05, qval: float = 0.05, export_key: str = "sv_gene", **kwargs, ): """This is a wrapper around somde Args: data: {adata} k: Number of SOM nodes alpha: Parameters for generate pseudo gene expression epoch: Number of epoch qval: Threshold for qval pval: Threshold for pval export_key: {export_key} **kwargs: {analysis_kwargs} """ ab = AnalysisBase(data, display_name="SOMDE", export_key=export_key, **kwargs) track_ix = [] results_data = [] for roi_name, roi_data, markers, exp, points in ab.roi_exp_iter_with_points( desc="Spatial variable genes: SOMDE"): sv_genes = smode_sv(pd.DataFrame(exp, index=markers, dtype=np.float32).fillna(0.0), np.array(points, dtype=np.float32), k=k, alpha=alpha, epoch=epoch, pval=pval, qval=qval) results_data.append(sv_genes) track_ix.append(roi_name) # a dict store all the markers markers_dict = {k: 0 for k in ab.markers} # unpack and merge to ensure every counter has the same markers ab.result = pd.DataFrame( data=[{ **markers_dict, **Counter(i) } for i in results_data], index=pd.MultiIndex.from_tuples(track_ix, names=ab.exp_obs), )[ab.markers]
def cell_density(data: AnnData, ratio: float = 1.0, export_key: str = "cell_density", **kwargs): """Calculating cell density in each ROI The size of each ROI will be auto-computed, it's the area of convex hull of all the cells in a ROI Args: data: {adata} ratio: The ratio between the unit used in your dataset and real length unit, default is 1.0; ratio = Dataset unit / real length unit; For example, if the resolution of your dataset is 1μm, but you want to use 1mm as unit, then you should set the ratio as 0.001, 1 pixels represent 0.001mm length. export_key: {export_key} **kwargs: {analysis_kwargs} """ ab = AnalysisBase(data, display_name="Cell density", export_key=export_key, **kwargs) ab.check_cell_type() result = ab.type_counter() area = [] for roi_name, roi_data, points in ab.roi_iter_with_points(): area.append(polygons_area(points)) area = np.asarray(area) * (ratio * ratio) result = result.div(area, axis=0) result.columns.name = 'cell type' ab.result = result
def cell_co_occurrence(data: AnnData, export_key: str = "cell_co_occurrence", **kwargs): """The likelihood of two type of cells occur simultaneously in a ROI Args: data: {adata} export_key: {export_key} **kwargs: {analysis_kwargs} """ ab = AnalysisBase(data, display_name="Cell co-occurrence", export_key=export_key, **kwargs) ab.check_cell_type() df = ab.type_counter() df = df.T # normalize it using mean, greater than mean suggest it's occurrence df = ((df - df.mean()) / (df.max() - df.min()) > 0).astype(int) df = df.T # generate combination of cell types cell_comb = [i for i in combinations_with_replacement(df.columns, 2)] index = [] values = [] for c in cell_comb: c1 = c[0] c2 = c[1] # if two type of cells are all 1, the result is 1, if one is 0, the result is 0 co_occur = (df[c1] * df[c2]).to_numpy() index.append((c1, c2)) values.append(co_occur) if c1 != c2: index.append((c2, c1)) values.append(co_occur) ab.result = pd.DataFrame( data=np.array(values).T, index=df.index, columns=pd.MultiIndex.from_tuples(index, names=['type1', 'type2']), )
def cell_components( data: AnnData, export_key: str = "cell_components", **kwargs, ): """Count the proportion of each types of cells in each group Args: data: {adata} export_key: {export_key} **kwargs: {analysis_kwargs} """ ab = AnalysisBase(data, display_name="Cell Components", export_key=export_key, **kwargs) ab.check_cell_type() result = ab.type_counter() result.columns.name = 'cell type' ab.result = result
def NCD_marker( data: AnnData, selected_markers: Optional[Array] = None, importance_cutoff: Number = 0.5, layer_key: Optional[str] = None, tree_kwargs: Optional[Dict] = None, test_method: str = "mannwhitneyu", pval: Number = 0.01, export_key: str = "ncd_marker", **kwargs, ): """Identify neighbor cells dependent marker This method tells you the dependency between markers and its neighbor cell type. The dependency is calculated by building a gradiant boosting tree (in here lightgbm) to determine the feature importance. A statistic test and fold change will be calculated for importance markers and its neighbor cells, the fold change is between marker with cell type at / not at the neighborhood. Args: data: {adata} importance_cutoff: Threshold to determine the feature markers selected_markers: {selected_markers} layer_key: {layer_key} tree_kwargs: {tree_kwargs} test_method: which test method to use, anything from :code:`scipy.stats` pval: {pval} export_key: {export_key} **kwargs: {analysis_kwargs} """ try: from lightgbm import LGBMRegressor except ImportError: raise ImportError( "lightgbm is not installed, please try `pip install lightgbm`.") ab = AnalysisBase(data, display_name="NCD Markers", export_key=export_key, **kwargs) ab.check_neighbors() ab.check_cell_type() tree_kwargs_ = {"n_jobs": -1, "random_state": 0, "importance_type": "gain"} if tree_kwargs is not None: for k, v in tree_kwargs.items(): tree_kwargs_[k] = v markers = ab.selected_markers(selected_markers) markers_mask = ab.markers_col.isin(markers) neighbors = read_neighbors(data.obs, ab.neighbors_key) labels = data.obs[ab.cell_id_key] cell_types = data.obs[ab.cell_type_key] col, comps = neighbor_components(neighbors, labels.tolist(), cell_types.tolist()) neigh_comp = pd.DataFrame( data=comps, columns=col, index=pd.MultiIndex.from_frame( data.obs[[ab.cell_type_key, ab.cell_id_key]], names=["type", "id"], ), ) results_data = [] # For markers in different cell types with np.errstate(divide="ignore"): for t, x in neigh_comp.groupby(level=["type"]): exp_ix = x.index.to_frame()["id"] exp = read_exp(data[exp_ix, markers_mask], layer_key) for i, y in enumerate(exp): # copy it to prevent memory peak according to lightgbm reg = LGBMRegressor(**tree_kwargs_).fit(x, y.copy()) weights = np.asarray(reg.feature_importances_) weights = weights / weights.sum() max_ix = np.argmax(weights) max_weight = weights[max_ix] max_type = col[max_ix] if max_weight > importance_cutoff: nx = x.copy() # add expression data to dataframe to allow cutting afterwards nx["exp"] = y # cells with max_type at neighbors at_neighbor = (nx.iloc[:, max_ix] != 0) at_neighbor_exp = nx[at_neighbor]["exp"].to_numpy() non_at_neighbor_exp = nx[~at_neighbor]["exp"].to_numpy() at_sum = at_neighbor_exp.sum() non_at_sum = non_at_neighbor_exp.sum() if (at_sum > 0) & (non_at_sum > 0): test_result = getattr(scipy.stats, test_method).__call__( at_neighbor_exp, non_at_neighbor_exp) pvalue = test_result.pvalue if pvalue < pval: at_mean = at_neighbor_exp.mean() non_at_mean = non_at_neighbor_exp.mean() log2_fc = np.log2(at_mean / non_at_mean) results_data.append([ t, markers[i], max_type, max_weight, log2_fc, pvalue, ]) ab.result = pd.DataFrame( data=results_data, columns=[ "cell_type", "marker", "neighbor_type", "dependency", "log2_FC", "pval", ], )
def cell_dispersion( data: AnnData, method: str = "id", min_cells: int = 10, pval: float = 0.01, r: Optional[Number] = None, resample: int = 1000, quad: Optional[Tuple[int, int]] = None, rect_size: Optional[Number] = None, export_key: str = "cell_dispersion", **kwargs, ): """Cell distribution pattern There are three type of distribution pattern (0 if no cells) - Random (1) - Regular (2) - Cluster (3) Three methods are provided - `Index of Dispersion <../about/implementation.html#index-of-dispersion>`_ - `Morisita’s index of dispersion <../about/implementation.html#morisitas-index-of-dispersion>`_ - `Clark and Evans aggregation index <../about/implementation.html#clark-and-evans-aggregation-index>`_ Notice that clark evans' index usually failed to detect local aggregation. +--------------------------------------+--------+---------+---------+ | | Random | Regular | Cluster | +======================================+========+=========+=========+ | Index of dispersion: ID | ID = 1 | ID < 1 | ID > 1 | +--------------------------------------+--------+---------+---------+ | Morisita’s index of dispersion: I | I = 1 | I < 1 | I > 1 | +--------------------------------------+--------+---------+---------+ | Clark and Evans aggregation index: R | R = 1 | R > 1 | R < 1 | +--------------------------------------+--------+---------+---------+ Args: data: {adata} method: "id", "morisita", and "clark_evans" (Default: "id") min_cells: The minimum number of the specific type of cells in a ROI to perform analysis pval: {pval} r: :code:`method="id"`, determine diameter of sample window, should be in [0, 1], default is 0.1 this take 1/10 of the shortest side of the ROI as the diameter. resample: :code:`method="id"`, the number of random permutations to perform quad: :code:`method="morisita"`, {quad} rect_size: :code:`method="morisita"`, {rect_size} export_key: {export_key} **kwargs: {analysis_kwargs} "quad" is quadratic statistic, it cuts a ROI into few rectangles, quad=(10,10) means the ROI will have 10*10 grid. """ method = options_guard(method, ["id", "morisita", "clark_evans"]) display_method = { "id": "Index of dispersion", "morisita": "Morisita index", "clark_evans": "Clark evans index", } ab = AnalysisBase(data, display_name="Cell dispersion", export_key=export_key, method=display_method[method], **kwargs) ab.check_cell_type() results_data = [] for roi_name, roi_data, points in ab.roi_iter_with_points( desc="Cell dispersion"): bbox = points_bbox(points) new_df = pd.DataFrame( dict(points=points, cell_types=roi_data[ab.cell_type_key])) points_collections = [] cell_types = [] for c, g in new_df.groupby("cell_types"): points_collections.append(g["points"]) cell_types.append(c) result = spatial_distribution_pattern( points_collections, bbox, method=method, r=r, resample=resample, quad=quad, rect_side=rect_size, pval=pval, min_cells=min_cells, dims=ab.dimension, ) for c, pattern in zip(cell_types, result): results_data.append([*roi_name, c, *pattern]) results_data = pd.DataFrame( data=results_data, columns=ab.exp_obs + ["cell_type", "index_value", "pval", "pattern"], ).reset_index().set_index(["index"] + ab.exp_obs) ab.params = dict(exp_obs=ab.exp_obs) ab.result = results_data
def hotspot( data: AnnData, selected_types: Optional[Array] = None, search_level: int = 3, quad: Optional[Tuple[int, int]] = None, rect_side: Optional[Tuple[float, float]] = None, pval: float = 0.01, export_key: str = "hotspot", **kwargs, ): """`Getis-ord hotspot detection <../about/implementation.html#hotspot-detection>`_ Used to identify cells that cluster together. Args: data: {adata} selected_types: {selected_types} search_level: How deep the search level to reach quad: {quad} rect_side: {rect_size} pval: {pval} export_key: {export_key} **kwargs: {analysis_kwargs} """ ab = AnalysisBase(data, display_name="Hotspot", export_key="hotspot_all", **kwargs) ab.check_cell_type() if selected_types is not None: ab.export_key = f"hotspot_{'_'.join(selected_types)}" else: selected_types = ab.cell_types hotcells = [] for roi_name, roi_data, points in ab.roi_iter_with_points( desc="Hotspot analysis"): bbox = points_bbox(points) roi_iter = roi_data.copy() roi_iter['__cells'] = points for t, g in roi_iter.groupby(ab.cell_type_key): cells = g['__cells'] if t in selected_types: hots = getis_ord( cells, bbox, search_level=search_level, quad=quad, rect_side=rect_side, pval=pval, ) hotcells.append(pd.Series(hots, index=g.index)) result = pd.concat(hotcells) data.obs[ab.export_key] = result # Cell map will leave blank if fill with None value data.obs[ab.export_key].fillna("other", inplace=True) arr = data.obs[ab.export_key].astype("category") arr = arr.cat.rename_categories({ True: "hot", False: "cold", "other": "other" }) data.obs[ab.export_key] = arr # Call this to invoke the print col2adata_obs(data.obs[ab.export_key], data, ab.export_key) ab.stop_timer()
def cell_interaction( data: AnnData, method: str = "pval", resample: int = 1000, pval: float = 0.01, export_key: str = "cell_interaction", **kwargs, ): """`Profiling cell-cell interaction <about/implementation.html#profiling-of-cell-cell-interaction>`_ using permutation test Neighborhood analysis tells you the relationship between different type of cells - Association (1) - Avoidance (-1) - No relationship (0) Args: data: {adata} method: "pval" and "zscore" (Default: "pval") resample: Number of times to perform resample pval: {pval} export_key: {export_key} **kwargs: {analysis_kwargs} .. seealso:: :class:`spatialtis.spatial_enrichment` """ method = options_guard(method, ["pval", "zscore"]) display_method = {"pval": "pseudo p-value", "zscore": "z-score"} ab = AnalysisBase(data, method=display_method[method], display_name="Cell interaction", export_key=export_key, **kwargs) cc = CellCombs(ab.cell_types) results_data = [] roi_tracker = [] repeat_time = 0 for roi_name, roi_data in ab.roi_iter(desc="Cell interaction"): neighbors = read_neighbors(roi_data, ab.neighbors_key) labels = roi_data[ab.cell_id_key] cell_types = roi_data[ab.cell_type_key] result = cc.bootstrap( cell_types, neighbors, labels, times=resample, pval=pval, method=method, ) results_data += result repeat_time = len(result) roi_tracker += [roi_name for _ in range(repeat_time)] df = pd.DataFrame(data=results_data, columns=["type1", "type2", "value", "relationship"]) ix = pd.DataFrame(data=roi_tracker, columns=ab.exp_obs).reset_index() df = df.set_index(pd.MultiIndex.from_frame(ix)) # df = df.pivot_table(values="value", index=ab.exp_obs, columns=["type1", "type2"]) ab.result = df
def spatial_autocorr( data: AnnData, method: str = "moran_i", pval: float = 0.05, two_tailed: bool = True, layer_key: Optional[str] = None, export_key: str = "spatial_autocorr", **kwargs, ): """Spatial auto-correlation for every markers This is used measure the correlation of marker expression with spatial locations. Moran's I is more for global spatial autocorrelation, Geary's C is more for local spatial autocorrelation Args: data: {data} method: "moran_i" or "geary_c" (Default: "moran_i") pval: {pval} two_tailed: Whether to use two tailed for p-value layer_key: {layer_key} export_key: {export_key} **kwargs: {analysis_kwargs} .. seealso:: :class:`spatialtis.somde` """ method = options_guard(method, ['moran_i', 'geary_c']) ab = AnalysisBase(data, method=method, display_name="Spatial auto-correlation", export_key=export_key, **kwargs) track_ix = [] results_data = [] for roi_name, roi_data, markers, exp in ab.roi_exp_iter( layer_key=layer_key, desc=ab.display_name): neighbors = read_neighbors(roi_data, ab.neighbors_key) labels = roi_data[ab.cell_id_key] results = autocorr( exp.astype(np.float64), neighbors, labels=labels, two_tailed=two_tailed, pval=pval, method=method, ) markers = markers.to_numpy() results = np.hstack([markers.reshape(-1, 1), results]) track_ix += [roi_name for _ in range(len(markers))] results_data.append(results) ab.result = pd.concat( [ pd.DataFrame(data=track_ix, columns=ab.exp_obs), pd.DataFrame( data=np.concatenate(results_data), columns=["marker", "pattern", "index_value", "pval"], ), ], axis=1, )
def spatial_enrichment( data: AnnData, threshold: Optional[float] = None, layer_key: Optional[str] = None, selected_markers: Optional[Array] = None, resample: int = 500, pval: float = 0.01, export_key: str = "spatial_enrichment", **kwargs, ): """`Profiling markers spatial enrichment <about/implementation.html#profiling-of-markers-co-expression>`_ using permutation test Similar to neighborhood analysis which tells you the relationship between different type of cells. This analysis tells you the spatial relationship between markers. Args: data: {adata} threshold: The expression level to determine whether a marker is positive layer_key: {layer_key} selected_markers: {selected_markers} resample: Number of times to perform resample pval: {pval} export_key: {export_key} **kwargs: {analysis_kwargs} .. seealso:: :class:`spatialtis.cell_interaction` """ ab = AnalysisBase(data, display_name="Spatial enrichment", export_key=export_key, **kwargs) ab.check_neighbors() if (threshold is not None) & (layer_key is None): layer_key = f"gt_{threshold}" data.layers[layer_key] = (data.X.copy() >= threshold).astype(bool) elif (threshold is not None) & (layer_key is not None): warnings.warn("You specific both threshold and layers_key, " "using user defined layers_key") else: layer_key = f"mean_cut" data.layers[layer_key] = (data.X.copy() >= data.X.mean(axis=0)).astype(bool) markers = ab.selected_markers(selected_markers) results_data = [] for roi_name, roi_data, mks, exp in ab.roi_exp_iter( selected_markers=markers, layer_key=layer_key, dtype=np.bool, desc="Spatial enrichment", ): neighbors = read_neighbors(roi_data, ab.neighbors_key) labels = roi_data[ab.cell_id_key] result = comb_bootstrap( exp, mks, neighbors, labels, pval=pval, times=resample, ) for pairs in result: results_data.append([*roi_name, *pairs]) df = pd.DataFrame(data=results_data, columns=ab.exp_obs + ["marker1", "marker2", "value"]) df = df.pivot_table(values="value", index=ab.exp_obs, columns=["marker1", "marker2"]) ab.result = df
def NMD_marker( data: AnnData, pval: float = 0.01, selected_markers: Optional[Array] = None, importance_cutoff: Number = 0.5, layer_key: Optional[str] = None, tree_kwargs: Optional[Dict] = None, export_key: str = "nmd_marker", **kwargs, ): """Identify neighbor markers dependent marker The neighborhood is treated as a single cell. Args: data: {adata} exp_std_cutoff: Standard deviation, threshold to filter out markers that are not variant enough pval: {pval} selected_markers: {selected_markers} layer_key: {layers_key} tree_kwargs: {tree_kwargs} export_key: {export_key} **kwargs: {analysis_kwargs} """ try: from lightgbm import LGBMRegressor except ImportError: raise ImportError( "lightgbm is not installed, please try `pip install lightgbm`.") ab = AnalysisBase(data, display_name="NMD marker", export_key=export_key, **kwargs) ab.check_neighbors() tree_kwargs_ = {"n_jobs": -1, "random_state": 0, "importance_type": "gain"} if tree_kwargs is not None: for k, v in tree_kwargs.items(): tree_kwargs_[k] = v markers = ab.selected_markers(selected_markers) markers_mask = ab.markers_col.isin(markers) neighbors = read_neighbors(data.obs, ab.neighbors_key) cent_exp = read_exp(data[:, markers_mask], layer_key) # treat the neighbors as single cell # sum the expression neigh_exp = np.asarray( [read_exp(data[n, markers_mask], layer_key).sum(1) for n in neighbors]) results_data = [] for i, y in enumerate( pbar_iter( cent_exp, desc="Neighbor-dependent markers", )): reg = LGBMRegressor(**tree_kwargs_).fit(neigh_exp, y) weights = np.asarray(reg.feature_importances_) ws = weights.sum() if ws != 0: weights = weights / weights.sum() max_ix = np.argmax(weights) max_weight = weights[max_ix] if max_weight > importance_cutoff: r, pvalue = spearmanr(y, neigh_exp[:, max_ix]) if pvalue < pval: results_data.append( [markers[i], markers[max_ix], max_weight, r, pvalue]) ab.result = pd.DataFrame( data=results_data, columns=["marker", "neighbor_marker", "dependency", "corr", "pval"], )
def spatial_coexp( data: AnnData, method: str = "spearman", use_cell_type: bool = False, selected_markers: Optional[List[str]] = None, layer_key: Optional[str] = None, corr_thresh: Optional[Number] = 0.5, export_key: str = "spatial_coexp", **kwargs, ): """Identifying spatial co-expression markers using correlation The correlation is calculated within pairs of neighbor cells Args: data: {adata} method: "spearman" or "pearson" (Default: "spearman") use_cell_type: Whether to use cell type information selected_markers: {selected_markers} corr_thresh: The minimum correlation value to store the result, layer_key: {layer_key} export_key: {export_key} **kwargs: {analysis_kwargs} """ method = options_guard(method, ['spearman', 'pearson']) display_method = { "spearman": "spearman correlation", "pearson": "pearson correlation" } ab = AnalysisBase(data, method=display_method[method], display_name="Spatial co-expression", export_key=export_key, **kwargs) ab.check_neighbors() if use_cell_type: ab.check_cell_type() ab.params = {"use_cell_type": use_cell_type} neighbors = read_neighbors(data.obs, ab.neighbors_key) labels = data.obs[ab.cell_id_key] pairs = neighbors_pairs(labels, neighbors, duplicates=True) used_markers = ab.markers if selected_markers is not None: # sort the user input according to index in anndata to maintain order when we read exp order = {v: i for i, v in enumerate(ab.markers_col)} used_markers = sorted(selected_markers, key=lambda x: order[x[0]]) markers_combs = [ (x, y) for x, y in combinations_with_replacement(used_markers, 2) ] if use_cell_type: pairs_pool = {} pairs_order = {} type_pairs = data.obs[ab.cell_type_key][pairs.ravel()] \ .to_numpy().reshape( pairs.shape ) for ix in range(type_pairs.shape[1]): i = type_pairs[:, ix] c = frozenset(i) if pairs_pool.get(c, 0) == 0: pairs_pool[c] = 1 pairs_order[c] = i else: if (pairs_order[c] != i).any(): pairs[:, ix] = pairs[:, ix][::-1] type_pairs[:, ix] = type_pairs[:, ix][::-1] types = pd.DataFrame(np.vstack([pairs, type_pairs]).T, columns=["p1", "p2", "c1", "c2"]) data_collector = [] for (t1, t2), df in pbar_iter(types.groupby(["c1", "c2"]), desc=DESCRIPTION): exp1 = read_exp(data[df["p1"].to_numpy(dtype=int), :], dtype=np.float, layer_key=layer_key) exp2 = read_exp(data[df["p2"].to_numpy(dtype=int), :], dtype=np.float, layer_key=layer_key) r = fast_corr(exp1, exp2, method=method) d = pd.DataFrame(markers_combs, columns=['marker1', 'marker2']) d['cell1'] = t1 d['cell2'] = t2 d['corr'] = r d = d[(d['corr'] > corr_thresh) | (d['corr'] < -corr_thresh)] data_collector.append(d) d = pd.concat(data_collector) ab.result = d.sort_values('corr', ascending=False) \ .reset_index(drop=True) else: exp1 = read_exp(data[pairs[0], :], dtype=np.float, layer_key=layer_key) exp2 = read_exp(data[pairs[1], :], dtype=np.float, layer_key=layer_key) r = fast_corr(exp1, exp2, method=method) d = pd.DataFrame(markers_combs, columns=['marker1', 'marker2']) d['corr'] = r if corr_thresh is not None: d = d[(d['corr'] > corr_thresh) | (d['corr'] < -corr_thresh)] ab.result = d.sort_values('corr', ascending=False).reset_index(drop=True)
def test_points_reader(data2d, data2d_keys, data3d, data3d_keys, data_wkt): ab = AnalysisBase(data=data2d, roi_key='ROI') assert np.array(ab.get_points()).shape[1] == 2 ab = AnalysisBase(data=data2d_keys, roi_key='ROI', centroid_key=('x', 'y')) assert np.array(ab.get_points()).shape[1] == 2 ab = AnalysisBase(data=data_wkt, roi_key='ROI') assert np.array(ab.get_points()).shape[1] == 2 ab = AnalysisBase(data=data3d, roi_key='ROI') assert np.array(ab.get_points()).shape[1] == 3 ab = AnalysisBase(data=data3d_keys, roi_key='ROI', centroid_key=('x', 'y', 'z')) assert np.array(ab.get_points()).shape[1] == 3
def spatial_heterogeneity( data: AnnData, method: str = "leibovici", d: Optional[int] = None, cut: int = 3, export_key: str = "heterogeneity", **kwargs, ): """Evaluate tissue heterogeneity based on entropy Entropy describes the amount of information. - `Shannon entropy <../about/implementation.html#shannon-entropy>`_ (No spatial info included):\ To compare the difference within a group (eg. different samples from same tumor), Kullback–Leibler divergences\ for each sample within the group are computed, smaller value indicates less difference within group. - `Leibovici entropy <../about/implementation.html#leibovici-entropy>`_:\ You can specific the distance threshold to determine co-occurrence events. - `Altieri entropy <../about/implementation.html#altieri-entropy>`_:\ You can specific the distance interval to determine co-occurrence events. Args: data: {adata} method: "shannon", "leibovici" and "altieri" (Default: "leibovici") d: :code:`method="leibovici"`, The distance threshold to determine co-occurrence events cut: :code:`method="altieri"`, Distance interval export_key: {export_key} **kwargs: {analysis_kwargs} """ method = options_guard(method, ["shannon", "altieri", "leibovici"]) ab = AnalysisBase(data, method=f"{method.capitalize()} entropy", display_name="Spatial heterogeneity", export_key=export_key, **kwargs) ab.check_cell_type() if method == "shannon": df = ab.type_counter() if len(df.columns) == 1: warnings.warn("No heterogeneity, you only have one type of cell.", UserWarning) else: ent = [entropy(row) for _, row in df.iterrows()] ab.result = pd.DataFrame({"heterogeneity": ent}, index=df.index) else: points_collections = [] types_collections = [] track_ix = [] # type_mapper = {t: i for i, t in enumerate(self.cell_types)} for roi_name, roi_data, points in ab.roi_iter_with_points( desc="Spatial heterogeneity"): points_collections.append(points) types_collections.append(roi_data[ab.cell_type_key]) track_ix.append(roi_name) ent = spatial_entropy(points_collections, types_collections, method=method, d=d, cut=cut, dims=ab.dimension) ab.result = pd.DataFrame( {"heterogeneity": ent}, index=pd.MultiIndex.from_tuples(track_ix, names=ab.exp_obs), )
def expression_map( data: AnnData, roi: str, marker: str, use_shape: bool = False, selected_types: Optional[List] = None, cell_type_key: Optional[str] = None, marker_key: Optional[str] = None, shape_key: Optional[str] = None, centroid_key: Optional[str] = None, roi_key: Optional[str] = None, **plot_options, ): """Visualize marker expression in ROI Args: data: {adata_plotting} roi: {roi} marker: use_shape: marker_key: shape_key: centroid_key: roi_key: **plot_options: Returns: """ ab = AnalysisBase( data, # cell_type_key=cell_type_key, shape_key=shape_key, centroid_key=centroid_key, roi_key=roi_key, marker_key=marker_key, verbose=False) # marker_key = Config.marker_key if marker_key is None else marker_key # shape_key = Config.shape_key if shape_key is None else shape_key # centroid_key = Config.centroid_key if centroid_key is None else centroid_key # roi_key = Config.roi_key if roi_key is None else roi_key internal_kwargs = dict(legend_title="expression") iter_data = data.obs.copy() points = ab.get_points() if len(points[0]) == 3: ab.dimension = 3 iter_data['__spatial_centroid'] = points roi_info = iter_data[iter_data[ab.roi_key] == roi] roi_selector = data.obs[ab.roi_key] == roi if len(roi_info) == 0: raise ValueError(f"ROI not exist, roi = {roi}") if ab.marker_key is None: marker_v = data[roi_selector, data.var.index == marker].X.copy() else: marker_v = data[roi_selector, data.var[ab.marker_key] == marker].X.copy() if issparse(marker_v): marker_v = marker_v.A marker_v = marker_v.flatten() cell_mask = None if selected_types is not None: cell_type_key = Config.cell_type_key if cell_type_key is None else cell_type_key cell_types = roi_info[cell_type_key] utypes = np.unique(selected_types) cell_mask = cell_types.isin(utypes) internal_kwargs = {**internal_kwargs, **plot_options} if use_shape: polygons = read_shapes(roi_info, shape_key) if cell_mask is not None: polygons = np.asarray(polygons)[cell_mask] marker_v = marker_v[cell_mask] ax = polygon_map(polygons, values=marker_v, **internal_kwargs) else: cells = np.array(roi_info['__spatial_centroid'].to_list()) if cell_mask is not None: cells = cells[cell_mask] marker_v = marker_v[cell_mask] if ab.dimension == 2: x, y = cells[:, 0], cells[:, 1] ax = point_map(x, y, values=marker_v, **internal_kwargs) else: x, y, z = cells[:, 0], cells[:, 1], cells[:, 2] ax = point_map3d(x, y, z, values=marker_v, **internal_kwargs) plt.title(f"{marker}") return ax
def cell_community( data: AnnData, resolution: float = 0.05, partition_type: Optional[Any] = None, partition_kwargs: Optional[Dict] = None, export_key: str = "community_id", **kwargs, ): """Spatial communities detection Here we use Leiden graph cluster algorithm Args: data: {adata} resolution: partition_type: The leidenalg partition type partition_kwargs: Pass to leidenalg.find_partition export_key: {export_key} **kwargs: {analysis_kwargs} """ ab = AnalysisBase(data, display_name="Cell community", export_key=export_key, **kwargs) # import leidenalg # import igraph as ig leidenalg = try_import("leidenalg") ig = try_import("igraph", install_name="python-igraph") ab.check_neighbors() if partition_type is None: partition_type = leidenalg.CPMVertexPartition if partition_kwargs is None: partition_kwargs = {"resolution_parameter": resolution} else: partition_kwargs = {"resolution_parameter": 0.05, **partition_kwargs} graphs = [] track_ix = [] sub_comm = [] for roi_name, roi_data, points in ab.roi_iter_with_points(): labels = roi_data[ab.cell_id_key] neighbors = read_neighbors(roi_data, ab.neighbors_key) vertices = [] edge_mapper = {} for i, (x, y) in zip(labels, points): vertices.append({"name": i, "x": x, "y": y}) edge_mapper[i] = (x, y) graph_edges = [] for k, vs in zip(labels, neighbors): if len(vs) > 0: for v in vs: if k < v: distance = euclidean(edge_mapper[k], edge_mapper[v]) graph_edges.append({ "source": k, "target": v, "weight": distance }) graph = ig.Graph.DictList(vertices, graph_edges) part = leidenalg.find_partition(graph, partition_type, **partition_kwargs) sub_comm += part.membership graphs.append(graph) track_ix.append(roi_name) sub_comm = pd.Series(sub_comm, index=data.obs.index) col2adata_obs(sub_comm, data, ab.export_key) ab.stop_timer()
def cell_map( data: AnnData, roi: str, use_shape: bool = False, selected_types: Optional[List] = None, masked_type_name: str = "Other", masked_type_color: str = "#d3d3d3", cell_type_key: Optional[str] = None, shape_key: Optional[str] = None, centroid_key: Optional[str] = None, roi_key: Optional[str] = None, **plot_options, ): """Visualize cells in ROI Args: data: {adata_plotting} roi: {roi} use_shape: Plot cell in polygon when shape data is available selected_types: {selected_types} masked_type_name: The name of the cell types not in selected_types masked_type_color: The color of the cell types not in selected_types cell_type_key: {cell_type_key} shape_key: {shape_key} centroid_key: {centroid_key} roi_key: {roi_key} **plot_options: Pass to `milkviz.point_map` or `milkviz.point_map3d` or `milkviz.polygon_map` """ ab = AnalysisBase(data, cell_type_key=cell_type_key, shape_key=shape_key, centroid_key=centroid_key, roi_key=roi_key, verbose=False) # cell_type_key = Config.cell_type_key if cell_type_key is None else cell_type_key # shape_key = Config.shape_key if shape_key is None else shape_key # centroid_key = Config.centroid_key if centroid_key is None else centroid_key # roi_key = Config.roi_key if roi_key is None else roi_key masked_type_color = to_hex(masked_type_color, keep_alpha=True) if ab.has_cell_type: all_cell_types = ab.cell_types color_mapper = dict(zip(all_cell_types, cycle(COLOR_POOL))) color_mapper[masked_type_name] = masked_type_color iter_data = data.obs.copy() points = ab.get_points() if len(points[0]) == 3: ab.dimension = 3 iter_data['__spatial_centroid'] = points roi_info = iter_data[iter_data[ab.roi_key] == roi] if len(roi_info) == 0: raise ValueError(f"ROI not exist, roi = {roi}") cell_types = roi_info[ab.cell_type_key] if ab.has_cell_type else None internal_kwargs = dict(legend_title="Cell type") if selected_types is not None: utypes = np.unique(selected_types) cell_mask = cell_types.isin(utypes) cell_types = cell_types.to_numpy() cell_types[~cell_mask] = np.unique(masked_type_name) internal_kwargs["colors"] = [color_mapper.get(c) for c in cell_types] internal_kwargs = {**internal_kwargs, **plot_options} if use_shape: polygons = read_shapes(roi_info, ab.shape_key) return polygon_map(polygons, types=cell_types, **internal_kwargs) else: cells = np.array(roi_info['__spatial_centroid'].to_list()) if ab.dimension == 2: x, y = cells[:, 0], cells[:, 1] return point_map(x, y, types=cell_types, **internal_kwargs) else: x, y, z = cells[:, 0], cells[:, 1], cells[:, 2] return point_map3d(x, y, z, types=cell_types, **internal_kwargs)