Ejemplos de overlap en Python

Lenguaje de programación: Python

Namespace/Package Name: bioframe

Método / Función: overlap

Ejemplos en hotexamples.com: 2

Python overlap - 2 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de bioframe.overlap extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def assign_regions(features, supports):
    """
    For each feature in features dataframe assign the genomic region (support)
    that overlaps with it. In case if feature overlaps multiple supports, the
    region with largest overlap will be reported.
    """

    index_name = features.index.name  # Store the name of index
    features = (
        features.copy().reset_index()
    )  # Store the original features' order as a column with original index

    if "chrom" in features.columns:
        overlap = bioframe.overlap(
            features,
            supports,
            how="left",
            cols1=["chrom", "start", "end"],
            cols2=["chrom", "start", "end"],
            keep_order=True,
            return_overlap=True,
        )
        overlap_columns = overlap.columns  # To filter out duplicates later
        overlap["overlap_length"] = overlap["overlap_end"] - overlap["overlap_start"]
        # Filter out overlaps with multiple regions:
        overlap = (
            overlap.sort_values("overlap_length", ascending=False)
            .drop_duplicates(overlap_columns, keep="first")
            .sort_index()
        )
        # Copy single column with overlapping region name:
        features["region"] = overlap["name_2"]

    if "chrom1" in features.columns:
        for idx in ("1", "2"):
            overlap = bioframe.overlap(
                features,
                supports,
                how="left",
                cols1=[f"chrom{idx}", f"start{idx}", f"end{idx}"],
                cols2=[f"chrom", f"start", f"end"],
                keep_order=True,
                return_overlap=True,
            )
            overlap_columns = overlap.columns  # To filter out duplicates later
            overlap[f"overlap_length{idx}"] = (
                overlap[f"overlap_end{idx}"] - overlap[f"overlap_start{idx}"]
            )
            # Filter out overlaps with multiple regions:
            overlap = (
                overlap.sort_values(f"overlap_length{idx}", ascending=False)
                .drop_duplicates(overlap_columns, keep="first")
                .sort_index()
            )
            # Copy single column with overlapping region name:
            features[f"region{idx}"] = overlap["name_2"]

        # Form a single column with region names where region1 == region2, and np.nan in other cases:
        features["region"] = np.where(
            features["region1"] == features["region2"], features["region1"], np.nan
        )
        features = features.drop(
            ["region1", "region2"], axis=1
        )  # Remove unnecessary columns

    features = features.set_index(
        index_name if not index_name is None else "index"
    )  # Restore the original index
    features.index.name = index_name  # Restore original index title
    return features

Ejemplo n.º 2

Mostrar archivo

def _assign_supports(features, supports):
    """assigns supports to entries in snipping windows.
    Workaround for bug in cooltools 0.2.0 that duplicate
    supports are not handled correctly. Copied from cooltools.common.assign_regions"""
    index_name = features.index.name  # Store the name of index
    features = (
        features.copy().reset_index()
    )  # Store the original features' order as a column with original index

    if "chrom" in features.columns:
        overlap = bioframe.overlap(
            features,
            supports,
            how="left",
            cols1=["chrom", "start", "end"],
            cols2=["chrom", "start", "end"],
            keep_order=True,
            return_overlap=True,
        )
        overlap_columns = [
            "index_1",
            "chrom_1",
            "start_1",
            "end_1",
        ]  # To filter out duplicates later
        overlap["overlap_length"] = overlap["overlap_end"] - overlap[
            "overlap_start"]
        # Filter out overlaps with multiple regions:
        overlap = (overlap.sort_values(
            "overlap_length", ascending=False).drop_duplicates(
                overlap_columns,
                keep="first").sort_index()).reset_index(drop=True)
        # Copy single column with overlapping region name:
        features["region"] = overlap["name_2"]

    if "chrom1" in features.columns:
        for idx in ("1", "2"):
            overlap = bioframe.overlap(
                features,
                supports,
                how="left",
                cols1=[f"chrom{idx}", f"start{idx}", f"end{idx}"],
                cols2=[f"chrom", f"start", f"end"],
                keep_order=True,
                return_overlap=True,
            )
            overlap_columns = [
                "index_1",
                f"chrom{idx}_1",
                f"start{idx}_1",
                f"end{idx}_1",
            ]  # To filter out duplicates later
            overlap[f"overlap_length{idx}"] = (overlap[f"overlap_end{idx}"] -
                                               overlap[f"overlap_start{idx}"])
            # Filter out overlaps with multiple regions:
            overlap = (overlap.sort_values(
                f"overlap_length{idx}", ascending=False).drop_duplicates(
                    overlap_columns,
                    keep="first").sort_index()).reset_index(drop=True)
            # Copy single column with overlapping region name:
            features[f"region{idx}"] = overlap["name_2"]

        # Form a single column with region names where region1 == region2, and np.nan in other cases:
        features["region"] = np.where(
            features["region1"] == features["region2"], features["region1"],
            np.nan)
        features = features.drop(["region1", "region2"],
                                 axis=1)  # Remove unnecessary columns

    features = features.set_index(index_name if not index_name is None else
                                  "index")  # Restore the original index
    features.index.name = index_name  # Restore original index title
    return features