コード例 #1
0
def cell_morphology(data: AnnData,
                    area_key: Optional[str] = None,
                    eccentricity_key: Optional[str] = None,
                    **kwargs):
    """Cell morphology variation between different groups

    This function only works for data with cell shape information.
    The area is calculated using shoelace formula
    The eccentricity is assume that the cell is close to ellipse, the semi-minor and semi-major axis
    is get from the bbox side.

    Args:
        data: {adata}
        area_key: The key to store cell area, Default: 'area'
        eccentricity_key: The key to store cell eccentricity, Default: 'eccentricity'
        **kwargs: {analysis_kwargs}

    """
    ab = AnalysisBase(data, display_name="Cell morphology", **kwargs)
    shapes = read_shapes(data.obs, ab.shape_key)
    areas = multipolygons_area(shapes)
    eccentricity = [
        bbox_eccentricity(bbox) for bbox in multipoints_bbox(shapes)
    ]
    area_key = ab.area_key if area_key is None else area_key
    eccentricity_key = ab.eccentricity_key if eccentricity_key is None else eccentricity_key
    col2adata_obs(areas, data, area_key)
    col2adata_obs(eccentricity, data, eccentricity_key)
    ab.stop_timer()  # write to obs, stop timer manually
コード例 #2
0
def test_exp_obs_naming(data2d):
    ab = AnalysisBase(data=data2d, roi_key="ROI")
    assert ab.exp_obs == ["ROI"]
    assert ab.roi_key == "ROI"

    ab = AnalysisBase(data=data2d, exp_obs=["ROI"])
    assert ab.exp_obs == ["ROI"]
    assert ab.roi_key == "ROI"
コード例 #3
0
ファイル: map.py プロジェクト: Mr-Milk/SpatialTis
def neighbors_map(
    data: AnnData,
    roi: str,
    cell_type_key: Optional[str] = None,
    centroid_key: Optional[str] = None,
    roi_key: Optional[str] = None,
    **plot_options,
):
    """Visualize neighbors network built in a ROI

    Args:
        data: {adata_plotting}
        roi: {roi}
        cell_type_key: {cell_type_key}
        centroid_key: {centroid_key}
        roi_key: {roi_key}
        **plot_options:

    Returns:

    """
    # cell_type_key = Config.cell_type_key if cell_type_key is None else cell_type_key
    # centroid_key = Config.centroid_key if centroid_key is None else centroid_key
    # roi_key = Config.roi_key if roi_key is None else roi_key
    ab = AnalysisBase(data,
                      cell_type_key=cell_type_key,
                      centroid_key=centroid_key,
                      roi_key=roi_key)

    iter_data = data.obs.copy()
    points = ab.get_points()
    if len(points[0]) == 3:
        raise NotImplementedError("Does not support 3D neighbor map")
    iter_data['__spatial_centroid'] = points
    roi_info = iter_data[iter_data[ab.roi_key] == roi]

    if len(roi_info) == 0:
        raise ValueError(f"ROI not exist, roi = {roi}")
    cell_types = roi_info[ab.cell_type_key] if ab.has_cell_type else None

    internal_kwargs = dict(legend_title="Cell type", **plot_options)

    cells = np.array(roi_info['__spatial_centroid'].to_list())
    x, y = cells[:, 0], cells[:, 1]
    neighbors = read_neighbors(roi_info, "cell_neighbors")
    labels = roi_info["cell_id"].astype(int)
    nmin = labels.min()
    links = []
    for l, neigh in zip(labels, neighbors):
        for n in neigh:
            if n > l:
                links.append((n - nmin, l - nmin))

    return point_map(x, y, types=cell_types, links=links, **internal_kwargs)
コード例 #4
0
ファイル: sv_gene.py プロジェクト: Mr-Milk/SpatialTis
def somde(
    data: AnnData,
    k: int = 20,
    alpha: float = 0.5,
    epoch: int = 100,
    pval: float = 0.05,
    qval: float = 0.05,
    export_key: str = "sv_gene",
    **kwargs,
):
    """This is a wrapper around somde

    Args:
        data: {adata}
        k: Number of SOM nodes
        alpha: Parameters for generate pseudo gene expression
        epoch: Number of epoch
        qval: Threshold for qval
        pval: Threshold for pval
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    """
    ab = AnalysisBase(data,
                      display_name="SOMDE",
                      export_key=export_key,
                      **kwargs)
    track_ix = []
    results_data = []
    for roi_name, roi_data, markers, exp, points in ab.roi_exp_iter_with_points(
            desc="Spatial variable genes: SOMDE"):
        sv_genes = smode_sv(pd.DataFrame(exp, index=markers,
                                         dtype=np.float32).fillna(0.0),
                            np.array(points, dtype=np.float32),
                            k=k,
                            alpha=alpha,
                            epoch=epoch,
                            pval=pval,
                            qval=qval)
        results_data.append(sv_genes)
        track_ix.append(roi_name)

    # a dict store all the markers
    markers_dict = {k: 0 for k in ab.markers}
    # unpack and merge to ensure every counter has the same markers
    ab.result = pd.DataFrame(
        data=[{
            **markers_dict,
            **Counter(i)
        } for i in results_data],
        index=pd.MultiIndex.from_tuples(track_ix, names=ab.exp_obs),
    )[ab.markers]
コード例 #5
0
def cell_density(data: AnnData,
                 ratio: float = 1.0,
                 export_key: str = "cell_density",
                 **kwargs):
    """Calculating cell density in each ROI

    The size of each ROI will be auto-computed, it's the area of convex hull of all the cells in a ROI

    Args:
        data: {adata}
        ratio: The ratio between the unit used in your dataset and real length unit, default is 1.0;
               ratio = Dataset unit / real length unit;
               For example, if the resolution of your dataset is 1μm, but you want to use 1mm as unit,
               then you should set the ratio as 0.001, 1 pixels represent 0.001mm length.
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    """
    ab = AnalysisBase(data,
                      display_name="Cell density",
                      export_key=export_key,
                      **kwargs)
    ab.check_cell_type()
    result = ab.type_counter()

    area = []
    for roi_name, roi_data, points in ab.roi_iter_with_points():
        area.append(polygons_area(points))

    area = np.asarray(area) * (ratio * ratio)
    result = result.div(area, axis=0)
    result.columns.name = 'cell type'
    ab.result = result
コード例 #6
0
def cell_co_occurrence(data: AnnData,
                       export_key: str = "cell_co_occurrence",
                       **kwargs):
    """The likelihood of two type of cells occur simultaneously in a ROI

    Args:
        data: {adata}
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    """

    ab = AnalysisBase(data,
                      display_name="Cell co-occurrence",
                      export_key=export_key,
                      **kwargs)
    ab.check_cell_type()
    df = ab.type_counter()
    df = df.T
    # normalize it using mean, greater than mean suggest it's occurrence
    df = ((df - df.mean()) / (df.max() - df.min()) > 0).astype(int)
    df = df.T
    # generate combination of cell types
    cell_comb = [i for i in combinations_with_replacement(df.columns, 2)]

    index = []
    values = []
    for c in cell_comb:
        c1 = c[0]
        c2 = c[1]
        # if two type of cells are all 1, the result is 1, if one is 0, the result is 0
        co_occur = (df[c1] * df[c2]).to_numpy()
        index.append((c1, c2))
        values.append(co_occur)
        if c1 != c2:
            index.append((c2, c1))
            values.append(co_occur)
    ab.result = pd.DataFrame(
        data=np.array(values).T,
        index=df.index,
        columns=pd.MultiIndex.from_tuples(index, names=['type1', 'type2']),
    )
コード例 #7
0
def cell_components(
    data: AnnData,
    export_key: str = "cell_components",
    **kwargs,
):
    """Count the proportion of each types of cells in each group

    Args:
        data: {adata}
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    """
    ab = AnalysisBase(data,
                      display_name="Cell Components",
                      export_key=export_key,
                      **kwargs)
    ab.check_cell_type()
    result = ab.type_counter()
    result.columns.name = 'cell type'
    ab.result = result
コード例 #8
0
def NCD_marker(
    data: AnnData,
    selected_markers: Optional[Array] = None,
    importance_cutoff: Number = 0.5,
    layer_key: Optional[str] = None,
    tree_kwargs: Optional[Dict] = None,
    test_method: str = "mannwhitneyu",
    pval: Number = 0.01,
    export_key: str = "ncd_marker",
    **kwargs,
):
    """Identify neighbor cells dependent marker

    This method tells you the dependency between markers and its neighbor cell type.
    The dependency is calculated by building a gradiant boosting tree (in here lightgbm) to determine
    the feature importance. A statistic test and fold change will be calculated for importance markers and its
    neighbor cells, the fold change is between marker with cell type at / not at the neighborhood.

    Args:
        data: {adata}
        importance_cutoff: Threshold to determine the feature markers
        selected_markers: {selected_markers}
        layer_key: {layer_key}
        tree_kwargs: {tree_kwargs}
        test_method: which test method to use, anything from :code:`scipy.stats`
        pval: {pval}
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    """

    try:
        from lightgbm import LGBMRegressor
    except ImportError:
        raise ImportError(
            "lightgbm is not installed, please try `pip install lightgbm`.")
    ab = AnalysisBase(data,
                      display_name="NCD Markers",
                      export_key=export_key,
                      **kwargs)
    ab.check_neighbors()
    ab.check_cell_type()

    tree_kwargs_ = {"n_jobs": -1, "random_state": 0, "importance_type": "gain"}
    if tree_kwargs is not None:
        for k, v in tree_kwargs.items():
            tree_kwargs_[k] = v

    markers = ab.selected_markers(selected_markers)
    markers_mask = ab.markers_col.isin(markers)

    neighbors = read_neighbors(data.obs, ab.neighbors_key)
    labels = data.obs[ab.cell_id_key]
    cell_types = data.obs[ab.cell_type_key]
    col, comps = neighbor_components(neighbors, labels.tolist(),
                                     cell_types.tolist())
    neigh_comp = pd.DataFrame(
        data=comps,
        columns=col,
        index=pd.MultiIndex.from_frame(
            data.obs[[ab.cell_type_key, ab.cell_id_key]],
            names=["type", "id"],
        ),
    )
    results_data = []
    # For markers in different cell types
    with np.errstate(divide="ignore"):
        for t, x in neigh_comp.groupby(level=["type"]):
            exp_ix = x.index.to_frame()["id"]
            exp = read_exp(data[exp_ix, markers_mask], layer_key)
            for i, y in enumerate(exp):
                # copy it to prevent memory peak according to lightgbm
                reg = LGBMRegressor(**tree_kwargs_).fit(x, y.copy())
                weights = np.asarray(reg.feature_importances_)
                weights = weights / weights.sum()
                max_ix = np.argmax(weights)
                max_weight = weights[max_ix]
                max_type = col[max_ix]
                if max_weight > importance_cutoff:
                    nx = x.copy()
                    # add expression data to dataframe to allow cutting afterwards
                    nx["exp"] = y
                    # cells with max_type at neighbors
                    at_neighbor = (nx.iloc[:, max_ix] != 0)
                    at_neighbor_exp = nx[at_neighbor]["exp"].to_numpy()
                    non_at_neighbor_exp = nx[~at_neighbor]["exp"].to_numpy()
                    at_sum = at_neighbor_exp.sum()
                    non_at_sum = non_at_neighbor_exp.sum()
                    if (at_sum > 0) & (non_at_sum > 0):
                        test_result = getattr(scipy.stats,
                                              test_method).__call__(
                                                  at_neighbor_exp,
                                                  non_at_neighbor_exp)
                        pvalue = test_result.pvalue
                        if pvalue < pval:
                            at_mean = at_neighbor_exp.mean()
                            non_at_mean = non_at_neighbor_exp.mean()
                            log2_fc = np.log2(at_mean / non_at_mean)
                            results_data.append([
                                t,
                                markers[i],
                                max_type,
                                max_weight,
                                log2_fc,
                                pvalue,
                            ])
    ab.result = pd.DataFrame(
        data=results_data,
        columns=[
            "cell_type",
            "marker",
            "neighbor_type",
            "dependency",
            "log2_FC",
            "pval",
        ],
    )
コード例 #9
0
def cell_dispersion(
    data: AnnData,
    method: str = "id",
    min_cells: int = 10,
    pval: float = 0.01,
    r: Optional[Number] = None,
    resample: int = 1000,
    quad: Optional[Tuple[int, int]] = None,
    rect_size: Optional[Number] = None,
    export_key: str = "cell_dispersion",
    **kwargs,
):
    """Cell distribution pattern

    There are three type of distribution pattern (0 if no cells)

    - Random (1)
    - Regular (2)
    - Cluster (3)

    Three methods are provided

    - `Index of Dispersion <../about/implementation.html#index-of-dispersion>`_
    - `Morisita’s index of dispersion <../about/implementation.html#morisitas-index-of-dispersion>`_
    - `Clark and Evans aggregation index <../about/implementation.html#clark-and-evans-aggregation-index>`_

    Notice that clark evans' index usually failed to detect local aggregation.

    +--------------------------------------+--------+---------+---------+
    |                                      | Random | Regular | Cluster |
    +======================================+========+=========+=========+
    | Index of dispersion: ID              | ID = 1 | ID < 1  | ID > 1  |
    +--------------------------------------+--------+---------+---------+
    | Morisita’s index of dispersion: I    | I = 1  |  I < 1  |  I > 1  |
    +--------------------------------------+--------+---------+---------+
    | Clark and Evans aggregation index: R | R = 1  |  R > 1  |  R < 1  |
    +--------------------------------------+--------+---------+---------+

    Args:
        data: {adata}
        method: "id", "morisita", and "clark_evans" (Default: "id")
        min_cells: The minimum number of the specific type of cells in a ROI to perform analysis
        pval: {pval}
        r: :code:`method="id"`, determine diameter of sample window, should be in [0, 1], default is 0.1
            this take 1/10 of the shortest side of the ROI as the diameter.
        resample: :code:`method="id"`, the number of random permutations to perform
        quad: :code:`method="morisita"`, {quad}
        rect_size: :code:`method="morisita"`, {rect_size}
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    "quad" is quadratic statistic, it cuts a ROI into few rectangles, quad=(10,10) means the ROI will have 10*10 grid.

    """
    method = options_guard(method, ["id", "morisita", "clark_evans"])
    display_method = {
        "id": "Index of dispersion",
        "morisita": "Morisita index",
        "clark_evans": "Clark evans index",
    }
    ab = AnalysisBase(data,
                      display_name="Cell dispersion",
                      export_key=export_key,
                      method=display_method[method],
                      **kwargs)
    ab.check_cell_type()

    results_data = []
    for roi_name, roi_data, points in ab.roi_iter_with_points(
            desc="Cell dispersion"):
        bbox = points_bbox(points)
        new_df = pd.DataFrame(
            dict(points=points, cell_types=roi_data[ab.cell_type_key]))
        points_collections = []
        cell_types = []
        for c, g in new_df.groupby("cell_types"):
            points_collections.append(g["points"])
            cell_types.append(c)
        result = spatial_distribution_pattern(
            points_collections,
            bbox,
            method=method,
            r=r,
            resample=resample,
            quad=quad,
            rect_side=rect_size,
            pval=pval,
            min_cells=min_cells,
            dims=ab.dimension,
        )
        for c, pattern in zip(cell_types, result):
            results_data.append([*roi_name, c, *pattern])
    results_data = pd.DataFrame(
        data=results_data,
        columns=ab.exp_obs + ["cell_type", "index_value", "pval", "pattern"],
    ).reset_index().set_index(["index"] + ab.exp_obs)
    ab.params = dict(exp_obs=ab.exp_obs)
    ab.result = results_data
コード例 #10
0
def hotspot(
    data: AnnData,
    selected_types: Optional[Array] = None,
    search_level: int = 3,
    quad: Optional[Tuple[int, int]] = None,
    rect_side: Optional[Tuple[float, float]] = None,
    pval: float = 0.01,
    export_key: str = "hotspot",
    **kwargs,
):
    """`Getis-ord hotspot detection <../about/implementation.html#hotspot-detection>`_

    Used to identify cells that cluster together.

    Args:
        data: {adata}
        selected_types: {selected_types}
        search_level: How deep the search level to reach
        quad: {quad}
        rect_side: {rect_size}
        pval: {pval}
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    """

    ab = AnalysisBase(data,
                      display_name="Hotspot",
                      export_key="hotspot_all",
                      **kwargs)
    ab.check_cell_type()
    if selected_types is not None:
        ab.export_key = f"hotspot_{'_'.join(selected_types)}"
    else:
        selected_types = ab.cell_types
    hotcells = []
    for roi_name, roi_data, points in ab.roi_iter_with_points(
            desc="Hotspot analysis"):
        bbox = points_bbox(points)
        roi_iter = roi_data.copy()
        roi_iter['__cells'] = points
        for t, g in roi_iter.groupby(ab.cell_type_key):
            cells = g['__cells']
            if t in selected_types:
                hots = getis_ord(
                    cells,
                    bbox,
                    search_level=search_level,
                    quad=quad,
                    rect_side=rect_side,
                    pval=pval,
                )
                hotcells.append(pd.Series(hots, index=g.index))

    result = pd.concat(hotcells)
    data.obs[ab.export_key] = result
    # Cell map will leave blank if fill with None value
    data.obs[ab.export_key].fillna("other", inplace=True)
    arr = data.obs[ab.export_key].astype("category")
    arr = arr.cat.rename_categories({
        True: "hot",
        False: "cold",
        "other": "other"
    })
    data.obs[ab.export_key] = arr
    # Call this to invoke the print
    col2adata_obs(data.obs[ab.export_key], data, ab.export_key)
    ab.stop_timer()
コード例 #11
0
def cell_interaction(
    data: AnnData,
    method: str = "pval",
    resample: int = 1000,
    pval: float = 0.01,
    export_key: str = "cell_interaction",
    **kwargs,
):
    """`Profiling cell-cell interaction <about/implementation.html#profiling-of-cell-cell-interaction>`_
    using permutation test

    Neighborhood analysis tells you the relationship between different type of cells

    - Association (1)
    - Avoidance (-1)
    - No relationship (0)

    Args:
        data: {adata}
        method: "pval" and "zscore" (Default: "pval")
        resample: Number of times to perform resample
        pval: {pval}
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    .. seealso:: :class:`spatialtis.spatial_enrichment`

    """
    method = options_guard(method, ["pval", "zscore"])
    display_method = {"pval": "pseudo p-value", "zscore": "z-score"}
    ab = AnalysisBase(data,
                      method=display_method[method],
                      display_name="Cell interaction",
                      export_key=export_key,
                      **kwargs)

    cc = CellCombs(ab.cell_types)

    results_data = []
    roi_tracker = []
    repeat_time = 0
    for roi_name, roi_data in ab.roi_iter(desc="Cell interaction"):
        neighbors = read_neighbors(roi_data, ab.neighbors_key)
        labels = roi_data[ab.cell_id_key]
        cell_types = roi_data[ab.cell_type_key]
        result = cc.bootstrap(
            cell_types,
            neighbors,
            labels,
            times=resample,
            pval=pval,
            method=method,
        )
        results_data += result
        repeat_time = len(result)
        roi_tracker += [roi_name for _ in range(repeat_time)]

    df = pd.DataFrame(data=results_data,
                      columns=["type1", "type2", "value", "relationship"])
    ix = pd.DataFrame(data=roi_tracker, columns=ab.exp_obs).reset_index()
    df = df.set_index(pd.MultiIndex.from_frame(ix))
    # df = df.pivot_table(values="value", index=ab.exp_obs, columns=["type1", "type2"])

    ab.result = df
コード例 #12
0
def spatial_autocorr(
    data: AnnData,
    method: str = "moran_i",
    pval: float = 0.05,
    two_tailed: bool = True,
    layer_key: Optional[str] = None,
    export_key: str = "spatial_autocorr",
    **kwargs,
):
    """Spatial auto-correlation for every markers

    This is used measure the correlation of marker expression with spatial locations.

    Moran's I is more for global spatial autocorrelation,
    Geary's C is more for local spatial autocorrelation

    Args:
        data: {data}
        method: "moran_i" or "geary_c" (Default: "moran_i")
        pval: {pval}
        two_tailed: Whether to use two tailed for p-value
        layer_key: {layer_key}
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    .. seealso:: :class:`spatialtis.somde`

    """
    method = options_guard(method, ['moran_i', 'geary_c'])
    ab = AnalysisBase(data,
                      method=method,
                      display_name="Spatial auto-correlation",
                      export_key=export_key,
                      **kwargs)
    track_ix = []
    results_data = []
    for roi_name, roi_data, markers, exp in ab.roi_exp_iter(
            layer_key=layer_key, desc=ab.display_name):
        neighbors = read_neighbors(roi_data, ab.neighbors_key)
        labels = roi_data[ab.cell_id_key]
        results = autocorr(
            exp.astype(np.float64),
            neighbors,
            labels=labels,
            two_tailed=two_tailed,
            pval=pval,
            method=method,
        )
        markers = markers.to_numpy()
        results = np.hstack([markers.reshape(-1, 1), results])
        track_ix += [roi_name for _ in range(len(markers))]
        results_data.append(results)

    ab.result = pd.concat(
        [
            pd.DataFrame(data=track_ix, columns=ab.exp_obs),
            pd.DataFrame(
                data=np.concatenate(results_data),
                columns=["marker", "pattern", "index_value", "pval"],
            ),
        ],
        axis=1,
    )
コード例 #13
0
ファイル: enrichment.py プロジェクト: Mr-Milk/SpatialTis
def spatial_enrichment(
    data: AnnData,
    threshold: Optional[float] = None,
    layer_key: Optional[str] = None,
    selected_markers: Optional[Array] = None,
    resample: int = 500,
    pval: float = 0.01,
    export_key: str = "spatial_enrichment",
    **kwargs,
):
    """`Profiling markers spatial enrichment <about/implementation.html#profiling-of-markers-co-expression>`_
    using permutation test

    Similar to neighborhood analysis which tells you the relationship between different type of cells.
    This analysis tells you the spatial relationship between markers.

    Args:
        data: {adata}
        threshold: The expression level to determine whether a marker is positive
        layer_key: {layer_key}
        selected_markers: {selected_markers}
        resample: Number of times to perform resample
        pval: {pval}
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    .. seealso:: :class:`spatialtis.cell_interaction`

    """

    ab = AnalysisBase(data,
                      display_name="Spatial enrichment",
                      export_key=export_key,
                      **kwargs)
    ab.check_neighbors()

    if (threshold is not None) & (layer_key is None):
        layer_key = f"gt_{threshold}"
        data.layers[layer_key] = (data.X.copy() >= threshold).astype(bool)
    elif (threshold is not None) & (layer_key is not None):
        warnings.warn("You specific both threshold and layers_key, "
                      "using user defined layers_key")
    else:
        layer_key = f"mean_cut"
        data.layers[layer_key] = (data.X.copy() >=
                                  data.X.mean(axis=0)).astype(bool)

    markers = ab.selected_markers(selected_markers)

    results_data = []
    for roi_name, roi_data, mks, exp in ab.roi_exp_iter(
            selected_markers=markers,
            layer_key=layer_key,
            dtype=np.bool,
            desc="Spatial enrichment",
    ):
        neighbors = read_neighbors(roi_data, ab.neighbors_key)
        labels = roi_data[ab.cell_id_key]
        result = comb_bootstrap(
            exp,
            mks,
            neighbors,
            labels,
            pval=pval,
            times=resample,
        )
        for pairs in result:
            results_data.append([*roi_name, *pairs])

    df = pd.DataFrame(data=results_data,
                      columns=ab.exp_obs + ["marker1", "marker2", "value"])
    df = df.pivot_table(values="value",
                        index=ab.exp_obs,
                        columns=["marker1", "marker2"])
    ab.result = df
コード例 #14
0
ファイル: nmd_markers.py プロジェクト: Mr-Milk/SpatialTis
def NMD_marker(
    data: AnnData,
    pval: float = 0.01,
    selected_markers: Optional[Array] = None,
    importance_cutoff: Number = 0.5,
    layer_key: Optional[str] = None,
    tree_kwargs: Optional[Dict] = None,
    export_key: str = "nmd_marker",
    **kwargs,
):
    """Identify neighbor markers dependent marker

    The neighborhood is treated as a single cell.

    Args:
        data: {adata}
        exp_std_cutoff: Standard deviation, threshold to filter out markers that are not variant enough
        pval: {pval}
        selected_markers: {selected_markers}
        layer_key: {layers_key}
        tree_kwargs: {tree_kwargs}
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    """
    try:
        from lightgbm import LGBMRegressor
    except ImportError:
        raise ImportError(
            "lightgbm is not installed, please try `pip install lightgbm`.")
    ab = AnalysisBase(data,
                      display_name="NMD marker",
                      export_key=export_key,
                      **kwargs)
    ab.check_neighbors()

    tree_kwargs_ = {"n_jobs": -1, "random_state": 0, "importance_type": "gain"}
    if tree_kwargs is not None:
        for k, v in tree_kwargs.items():
            tree_kwargs_[k] = v

    markers = ab.selected_markers(selected_markers)
    markers_mask = ab.markers_col.isin(markers)
    neighbors = read_neighbors(data.obs, ab.neighbors_key)
    cent_exp = read_exp(data[:, markers_mask], layer_key)
    # treat the neighbors as single cell
    # sum the expression
    neigh_exp = np.asarray(
        [read_exp(data[n, markers_mask], layer_key).sum(1) for n in neighbors])
    results_data = []
    for i, y in enumerate(
            pbar_iter(
                cent_exp,
                desc="Neighbor-dependent markers",
            )):
        reg = LGBMRegressor(**tree_kwargs_).fit(neigh_exp, y)
        weights = np.asarray(reg.feature_importances_)
        ws = weights.sum()
        if ws != 0:
            weights = weights / weights.sum()
            max_ix = np.argmax(weights)
            max_weight = weights[max_ix]
            if max_weight > importance_cutoff:
                r, pvalue = spearmanr(y, neigh_exp[:, max_ix])
                if pvalue < pval:
                    results_data.append(
                        [markers[i], markers[max_ix], max_weight, r, pvalue])

    ab.result = pd.DataFrame(
        data=results_data,
        columns=["marker", "neighbor_marker", "dependency", "corr", "pval"],
    )
コード例 #15
0
ファイル: co_expression.py プロジェクト: Mr-Milk/SpatialTis
def spatial_coexp(
    data: AnnData,
    method: str = "spearman",
    use_cell_type: bool = False,
    selected_markers: Optional[List[str]] = None,
    layer_key: Optional[str] = None,
    corr_thresh: Optional[Number] = 0.5,
    export_key: str = "spatial_coexp",
    **kwargs,
):
    """Identifying spatial co-expression markers using correlation

    The correlation is calculated within pairs of neighbor cells

    Args:
        data: {adata}
        method: "spearman" or "pearson" (Default: "spearman")
        use_cell_type: Whether to use cell type information
        selected_markers: {selected_markers}
        corr_thresh: The minimum correlation value to store the result,
        layer_key: {layer_key}
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    """
    method = options_guard(method, ['spearman', 'pearson'])
    display_method = {
        "spearman": "spearman correlation",
        "pearson": "pearson correlation"
    }
    ab = AnalysisBase(data,
                      method=display_method[method],
                      display_name="Spatial co-expression",
                      export_key=export_key,
                      **kwargs)
    ab.check_neighbors()

    if use_cell_type:
        ab.check_cell_type()

    ab.params = {"use_cell_type": use_cell_type}

    neighbors = read_neighbors(data.obs, ab.neighbors_key)
    labels = data.obs[ab.cell_id_key]
    pairs = neighbors_pairs(labels, neighbors, duplicates=True)
    used_markers = ab.markers
    if selected_markers is not None:
        # sort the user input according to index in anndata to maintain order when we read exp
        order = {v: i for i, v in enumerate(ab.markers_col)}
        used_markers = sorted(selected_markers, key=lambda x: order[x[0]])
    markers_combs = [
        (x, y) for x, y in combinations_with_replacement(used_markers, 2)
    ]

    if use_cell_type:
        pairs_pool = {}
        pairs_order = {}
        type_pairs = data.obs[ab.cell_type_key][pairs.ravel()] \
            .to_numpy().reshape(
            pairs.shape
        )
        for ix in range(type_pairs.shape[1]):
            i = type_pairs[:, ix]
            c = frozenset(i)
            if pairs_pool.get(c, 0) == 0:
                pairs_pool[c] = 1
                pairs_order[c] = i
            else:
                if (pairs_order[c] != i).any():
                    pairs[:, ix] = pairs[:, ix][::-1]
                    type_pairs[:, ix] = type_pairs[:, ix][::-1]
        types = pd.DataFrame(np.vstack([pairs, type_pairs]).T,
                             columns=["p1", "p2", "c1", "c2"])

        data_collector = []
        for (t1, t2), df in pbar_iter(types.groupby(["c1", "c2"]),
                                      desc=DESCRIPTION):
            exp1 = read_exp(data[df["p1"].to_numpy(dtype=int), :],
                            dtype=np.float,
                            layer_key=layer_key)
            exp2 = read_exp(data[df["p2"].to_numpy(dtype=int), :],
                            dtype=np.float,
                            layer_key=layer_key)

            r = fast_corr(exp1, exp2, method=method)
            d = pd.DataFrame(markers_combs, columns=['marker1', 'marker2'])
            d['cell1'] = t1
            d['cell2'] = t2
            d['corr'] = r
            d = d[(d['corr'] > corr_thresh) | (d['corr'] < -corr_thresh)]
            data_collector.append(d)
        d = pd.concat(data_collector)
        ab.result = d.sort_values('corr', ascending=False) \
            .reset_index(drop=True)

    else:
        exp1 = read_exp(data[pairs[0], :], dtype=np.float, layer_key=layer_key)
        exp2 = read_exp(data[pairs[1], :], dtype=np.float, layer_key=layer_key)
        r = fast_corr(exp1, exp2, method=method)
        d = pd.DataFrame(markers_combs, columns=['marker1', 'marker2'])
        d['corr'] = r
        if corr_thresh is not None:
            d = d[(d['corr'] > corr_thresh) | (d['corr'] < -corr_thresh)]
        ab.result = d.sort_values('corr',
                                  ascending=False).reset_index(drop=True)
コード例 #16
0
def test_points_reader(data2d, data2d_keys, data3d, data3d_keys, data_wkt):
    ab = AnalysisBase(data=data2d, roi_key='ROI')
    assert np.array(ab.get_points()).shape[1] == 2

    ab = AnalysisBase(data=data2d_keys, roi_key='ROI', centroid_key=('x', 'y'))
    assert np.array(ab.get_points()).shape[1] == 2

    ab = AnalysisBase(data=data_wkt, roi_key='ROI')
    assert np.array(ab.get_points()).shape[1] == 2

    ab = AnalysisBase(data=data3d, roi_key='ROI')
    assert np.array(ab.get_points()).shape[1] == 3

    ab = AnalysisBase(data=data3d_keys,
                      roi_key='ROI',
                      centroid_key=('x', 'y', 'z'))
    assert np.array(ab.get_points()).shape[1] == 3
コード例 #17
0
def spatial_heterogeneity(
    data: AnnData,
    method: str = "leibovici",
    d: Optional[int] = None,
    cut: int = 3,
    export_key: str = "heterogeneity",
    **kwargs,
):
    """Evaluate tissue heterogeneity based on entropy

        Entropy describes the amount of information.

        - `Shannon entropy <../about/implementation.html#shannon-entropy>`_ (No spatial info included):\
            To compare the difference within a group (eg. different samples from same tumor), Kullback–Leibler divergences\
            for each sample within the group are computed, smaller value indicates less difference within group.
        - `Leibovici entropy <../about/implementation.html#leibovici-entropy>`_:\
        You can specific the distance threshold to determine co-occurrence events.
        - `Altieri entropy <../about/implementation.html#altieri-entropy>`_:\
        You can specific the distance interval to determine co-occurrence events.

        Args:
            data: {adata}
            method: "shannon", "leibovici" and "altieri" (Default: "leibovici")
            d: :code:`method="leibovici"`, The distance threshold to determine co-occurrence events
            cut: :code:`method="altieri"`, Distance interval
            export_key: {export_key}
            **kwargs: {analysis_kwargs}

    """

    method = options_guard(method, ["shannon", "altieri", "leibovici"])

    ab = AnalysisBase(data,
                      method=f"{method.capitalize()} entropy",
                      display_name="Spatial heterogeneity",
                      export_key=export_key,
                      **kwargs)
    ab.check_cell_type()

    if method == "shannon":
        df = ab.type_counter()
        if len(df.columns) == 1:
            warnings.warn("No heterogeneity, you only have one type of cell.",
                          UserWarning)
        else:
            ent = [entropy(row) for _, row in df.iterrows()]
            ab.result = pd.DataFrame({"heterogeneity": ent}, index=df.index)

    else:
        points_collections = []
        types_collections = []
        track_ix = []
        # type_mapper = {t: i for i, t in enumerate(self.cell_types)}
        for roi_name, roi_data, points in ab.roi_iter_with_points(
                desc="Spatial heterogeneity"):
            points_collections.append(points)
            types_collections.append(roi_data[ab.cell_type_key])
            track_ix.append(roi_name)

        ent = spatial_entropy(points_collections,
                              types_collections,
                              method=method,
                              d=d,
                              cut=cut,
                              dims=ab.dimension)
        ab.result = pd.DataFrame(
            {"heterogeneity": ent},
            index=pd.MultiIndex.from_tuples(track_ix, names=ab.exp_obs),
        )
コード例 #18
0
ファイル: map.py プロジェクト: Mr-Milk/SpatialTis
def expression_map(
    data: AnnData,
    roi: str,
    marker: str,
    use_shape: bool = False,
    selected_types: Optional[List] = None,
    cell_type_key: Optional[str] = None,
    marker_key: Optional[str] = None,
    shape_key: Optional[str] = None,
    centroid_key: Optional[str] = None,
    roi_key: Optional[str] = None,
    **plot_options,
):
    """Visualize marker expression in ROI

    Args:
        data: {adata_plotting}
        roi: {roi}
        marker:
        use_shape:
        marker_key:
        shape_key:
        centroid_key:
        roi_key:
        **plot_options:

    Returns:

    """
    ab = AnalysisBase(
        data,
        # cell_type_key=cell_type_key,
        shape_key=shape_key,
        centroid_key=centroid_key,
        roi_key=roi_key,
        marker_key=marker_key,
        verbose=False)
    # marker_key = Config.marker_key if marker_key is None else marker_key
    # shape_key = Config.shape_key if shape_key is None else shape_key
    # centroid_key = Config.centroid_key if centroid_key is None else centroid_key
    # roi_key = Config.roi_key if roi_key is None else roi_key

    internal_kwargs = dict(legend_title="expression")

    iter_data = data.obs.copy()
    points = ab.get_points()
    if len(points[0]) == 3:
        ab.dimension = 3
    iter_data['__spatial_centroid'] = points
    roi_info = iter_data[iter_data[ab.roi_key] == roi]

    roi_selector = data.obs[ab.roi_key] == roi

    if len(roi_info) == 0:
        raise ValueError(f"ROI not exist, roi = {roi}")
    if ab.marker_key is None:
        marker_v = data[roi_selector, data.var.index == marker].X.copy()
    else:
        marker_v = data[roi_selector,
                        data.var[ab.marker_key] == marker].X.copy()
    if issparse(marker_v):
        marker_v = marker_v.A
    marker_v = marker_v.flatten()

    cell_mask = None
    if selected_types is not None:
        cell_type_key = Config.cell_type_key if cell_type_key is None else cell_type_key
        cell_types = roi_info[cell_type_key]
        utypes = np.unique(selected_types)
        cell_mask = cell_types.isin(utypes)

    internal_kwargs = {**internal_kwargs, **plot_options}
    if use_shape:
        polygons = read_shapes(roi_info, shape_key)
        if cell_mask is not None:
            polygons = np.asarray(polygons)[cell_mask]
            marker_v = marker_v[cell_mask]
        ax = polygon_map(polygons, values=marker_v, **internal_kwargs)
    else:
        cells = np.array(roi_info['__spatial_centroid'].to_list())
        if cell_mask is not None:
            cells = cells[cell_mask]
            marker_v = marker_v[cell_mask]
        if ab.dimension == 2:
            x, y = cells[:, 0], cells[:, 1]
            ax = point_map(x, y, values=marker_v, **internal_kwargs)
        else:
            x, y, z = cells[:, 0], cells[:, 1], cells[:, 2]
            ax = point_map3d(x, y, z, values=marker_v, **internal_kwargs)
    plt.title(f"{marker}")
    return ax
コード例 #19
0
ファイル: network.py プロジェクト: Mr-Milk/SpatialTis
def cell_community(
    data: AnnData,
    resolution: float = 0.05,
    partition_type: Optional[Any] = None,
    partition_kwargs: Optional[Dict] = None,
    export_key: str = "community_id",
    **kwargs,
):
    """Spatial communities detection

    Here we use Leiden graph cluster algorithm

    Args:
        data: {adata}
        resolution:
        partition_type: The leidenalg partition type
        partition_kwargs: Pass to leidenalg.find_partition
        export_key: {export_key}
        **kwargs: {analysis_kwargs}

    """

    ab = AnalysisBase(data,
                      display_name="Cell community",
                      export_key=export_key,
                      **kwargs)

    # import leidenalg
    # import igraph as ig
    leidenalg = try_import("leidenalg")
    ig = try_import("igraph", install_name="python-igraph")

    ab.check_neighbors()

    if partition_type is None:
        partition_type = leidenalg.CPMVertexPartition
    if partition_kwargs is None:
        partition_kwargs = {"resolution_parameter": resolution}
    else:
        partition_kwargs = {"resolution_parameter": 0.05, **partition_kwargs}

    graphs = []
    track_ix = []
    sub_comm = []
    for roi_name, roi_data, points in ab.roi_iter_with_points():
        labels = roi_data[ab.cell_id_key]
        neighbors = read_neighbors(roi_data, ab.neighbors_key)
        vertices = []
        edge_mapper = {}
        for i, (x, y) in zip(labels, points):
            vertices.append({"name": i, "x": x, "y": y})
            edge_mapper[i] = (x, y)

        graph_edges = []
        for k, vs in zip(labels, neighbors):
            if len(vs) > 0:
                for v in vs:
                    if k < v:
                        distance = euclidean(edge_mapper[k], edge_mapper[v])
                        graph_edges.append({
                            "source": k,
                            "target": v,
                            "weight": distance
                        })
        graph = ig.Graph.DictList(vertices, graph_edges)
        part = leidenalg.find_partition(graph, partition_type,
                                        **partition_kwargs)
        sub_comm += part.membership
        graphs.append(graph)
        track_ix.append(roi_name)

    sub_comm = pd.Series(sub_comm, index=data.obs.index)
    col2adata_obs(sub_comm, data, ab.export_key)
    ab.stop_timer()
コード例 #20
0
ファイル: map.py プロジェクト: Mr-Milk/SpatialTis
def cell_map(
    data: AnnData,
    roi: str,
    use_shape: bool = False,
    selected_types: Optional[List] = None,
    masked_type_name: str = "Other",
    masked_type_color: str = "#d3d3d3",
    cell_type_key: Optional[str] = None,
    shape_key: Optional[str] = None,
    centroid_key: Optional[str] = None,
    roi_key: Optional[str] = None,
    **plot_options,
):
    """Visualize cells in ROI

    Args:
        data: {adata_plotting}
        roi: {roi}
        use_shape: Plot cell in polygon when shape data is available
        selected_types: {selected_types}
        masked_type_name: The name of the cell types not in selected_types
        masked_type_color: The color of the cell types not in selected_types
        cell_type_key: {cell_type_key}
        shape_key: {shape_key}
        centroid_key: {centroid_key}
        roi_key: {roi_key}
        **plot_options: Pass to `milkviz.point_map` or `milkviz.point_map3d` or `milkviz.polygon_map`

    """
    ab = AnalysisBase(data,
                      cell_type_key=cell_type_key,
                      shape_key=shape_key,
                      centroid_key=centroid_key,
                      roi_key=roi_key,
                      verbose=False)
    # cell_type_key = Config.cell_type_key if cell_type_key is None else cell_type_key
    # shape_key = Config.shape_key if shape_key is None else shape_key
    # centroid_key = Config.centroid_key if centroid_key is None else centroid_key
    # roi_key = Config.roi_key if roi_key is None else roi_key
    masked_type_color = to_hex(masked_type_color, keep_alpha=True)

    if ab.has_cell_type:
        all_cell_types = ab.cell_types
        color_mapper = dict(zip(all_cell_types, cycle(COLOR_POOL)))
        color_mapper[masked_type_name] = masked_type_color

    iter_data = data.obs.copy()
    points = ab.get_points()
    if len(points[0]) == 3:
        ab.dimension = 3
    iter_data['__spatial_centroid'] = points
    roi_info = iter_data[iter_data[ab.roi_key] == roi]

    if len(roi_info) == 0:
        raise ValueError(f"ROI not exist, roi = {roi}")
    cell_types = roi_info[ab.cell_type_key] if ab.has_cell_type else None

    internal_kwargs = dict(legend_title="Cell type")

    if selected_types is not None:
        utypes = np.unique(selected_types)
        cell_mask = cell_types.isin(utypes)
        cell_types = cell_types.to_numpy()
        cell_types[~cell_mask] = np.unique(masked_type_name)
        internal_kwargs["colors"] = [color_mapper.get(c) for c in cell_types]

    internal_kwargs = {**internal_kwargs, **plot_options}
    if use_shape:
        polygons = read_shapes(roi_info, ab.shape_key)
        return polygon_map(polygons, types=cell_types, **internal_kwargs)
    else:
        cells = np.array(roi_info['__spatial_centroid'].to_list())
        if ab.dimension == 2:
            x, y = cells[:, 0], cells[:, 1]
            return point_map(x, y, types=cell_types, **internal_kwargs)
        else:
            x, y, z = cells[:, 0], cells[:, 1], cells[:, 2]
            return point_map3d(x, y, z, types=cell_types, **internal_kwargs)