コード例 #1
0
def test_bystrand_bydistance_pileups_with_controls(request):
    """
    Test the snipping on matrix:
    """
    # Read cool file and create regions out of it:
    clr = cooler.Cooler(
        op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool"))
    regions = bf.read_table(op.join(request.fspath.dirname,
                                    "data/CN.mm9.toy_regions.bed"),
                            schema="bed4")
    features = bf.read_table(op.join(request.fspath.dirname,
                                     "data/toy_features.bed"),
                             schema="bed")
    cc = CoordCreator(
        features,
        1_000_000,
        features_format="bed",
        local=False,
        flank=2_000_000,
        mindist=0,
    )
    pu = PileUpper(clr, cc, expected=False, view_df=regions, control=True)
    pup = pu.pileupsByStrandByDistanceWithControl()
    assert np.all(
        pup.sort_values(["orientation", "distance_band"])["n"] ==
        [1, 2, 1, 1, 1])
コード例 #2
0
def test_offdiag_pileups_without_expected(request):
    """
    Test the snipping on matrix:
    """
    # Read cool file and create regions out of it:
    clr = cooler.Cooler(
        op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool"))
    regions = bioframe.read_table(op.join(request.fspath.dirname,
                                          "data/CN.mm9.toy_regions.bed"),
                                  schema="bed4")

    # I.
    # Example region with windows, two regions from annotated genomic regions:
    windows1 = cooltools.snipping.make_bin_aligned_windows(
        1_000_000, ["chr1", "chr1"], [102_000_000, 105_000_000],
        flank_bp=2_000_000)
    windows2 = cooltools.snipping.make_bin_aligned_windows(
        1_000_000, ["chr1", "chr1"], [105_000_000, 109_000_000],
        flank_bp=2_000_000)
    windows = pd.merge(windows1,
                       windows2,
                       left_index=True,
                       right_index=True,
                       suffixes=("1", "2"))
    windows = cooltools.snipping.assign_regions(windows,
                                                regions).reset_index(drop=True)

    snipper = cooltools.snipping.CoolerSnipper(clr, regions=regions)
    stack = cooltools.snipping.pileup(windows,
                                      snipper.select,
                                      snipper.snip,
                                      map=map)

    # Check that the size of snips is OK and there are two of them:
    assert stack.shape == (5, 5, 2)

    # II.
    # Example region with windows, second window comes from unannotated genomic region:
    windows1 = cooltools.snipping.make_bin_aligned_windows(
        1_000_000, ["chr1", "chr1"], [102_000_000, 10_000_000],
        flank_bp=2_000_000)
    windows2 = cooltools.snipping.make_bin_aligned_windows(
        1_000_000, ["chr1", "chr1"], [105_000_000, 109_000_000],
        flank_bp=2_000_000)
    windows = pd.merge(windows1,
                       windows2,
                       left_index=True,
                       right_index=True,
                       suffixes=("1", "2"))
    windows = cooltools.snipping.assign_regions(windows,
                                                regions).reset_index(drop=True)

    stack = cooltools.snipping.pileup(windows,
                                      snipper.select,
                                      snipper.snip,
                                      map=map)

    assert stack.shape == (5, 5, 2)
    assert np.all(np.isfinite(stack[:, :, 0]))
    assert np.all(np.isnan(stack[:, :, 1]))
コード例 #3
0
ファイル: test_io.py プロジェクト: open2c/bioframe
def test_read_table():

    d = """chr1\nchr2\nchr2"""
    assert bioframe.read_table(StringIO(d), schema="bed3").shape == (3, 3)

    # raise a value error if any columns are filled with all NA
    with pytest.raises(ValueError):
        bioframe.read_table(StringIO(d), schema="bed3", schema_is_strict=True)

    # fill with nans to appropriate size if schema_is_strict=False (aka the default)
    d = """chr1      5    10
           chr1     10   20
           chr2    30  40"""
    assert bioframe.read_table(StringIO(d), schema="bed3",
                               sep="\s+").shape == (3, 3)
    assert bioframe.read_table(StringIO(d), schema="bed6",
                               sep="\s+").shape == (3, 6)
    assert bioframe.read_table(StringIO(d), schema="bed12",
                               sep="\s+").shape == (3, 12)

    # bedpe has 10 columns
    d = """chr1    5    10  chr2   5   10   interval1  .  +  -
           chr1    10   20  chr1   5   10   interval2  .  +  -
           chr2    30   40  chr2   5   10   interval3  12  +  -
        """
    assert bioframe.read_table(StringIO(d),
                               schema="bedpe",
                               sep="\s+",
                               schema_is_strict=True).shape == (3, 10)
コード例 #4
0
ファイル: common.py プロジェクト: Phlya/cooltools
def read_viewframe(
    fname,
    verify_cooler_view=None,
):
    """
    Read a BED file with regions that conforms
    a definition of a viewframe (non-overlaping, unique names, etc).

    Parameters
    ----------
    fname : str
        Path to a BED file with regions.
    verify_cooler_view : None or viewframe
        Viewframe with entire chromosome sizes

    Returns
    -------
    view_df : pd.DataFrame
        DataFrame with the viewframe
    """
    # define chromsizes based on verify_cooler_view
    chromsizes = None if (verify_cooler_view is None) else \
        verify_cooler_view.set_index("chrom")["end"]
    # read BED file assuming bed4/3 formats (with names-columns and without):
    try:
        view_df = bioframe.read_table(fname, schema="bed4", index_col=False)
    except Exception:
        view_df = bioframe.read_table(fname, schema="bed3", index_col=False)
    # Convert view dataframe to viewframe:
    try:
        view_df = bioframe.make_viewframe(view_df) if (verify_cooler_view is None) else \
            bioframe.make_viewframe(view_df, check_bounds=chromsizes)
    except ValueError as e:
        raise ValueError(
            "View table is incorrect, please, comply with the format. ") from e

    # Check that input view is contained in cooler bounds, but not vice versa (because cooler may have more regions):
    if verify_cooler_view is not None:
        if not bioframe.is_contained(view_df, verify_cooler_view):
            raise ValueError(
                "View regions are not contained in cooler chromsizes bounds")

    return view_df
コード例 #5
0
def test_cooler_snipper_with_regions(request):
    clr = cooler.Cooler(
        op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool"))
    regions = bioframe.read_table(op.join(request.fspath.dirname,
                                          "data/CN.mm9.toy_regions.bed"),
                                  schema="bed4")
    snipper = cooltools.snipping.CoolerSnipper(clr, regions=regions)
    matrix = snipper.select("foo", "foo")
    snippet = snipper.snip(
        matrix, "foo", "foo",
        (110_000_000, 120_000_000, 110_000_000, 120_000_000))
    assert snippet.shape is not None
コード例 #6
0
def test_bystrand_pileups_with_expected(request):
    """
    Test the snipping on matrix:
    """
    # Read cool file and create regions out of it:
    clr = cooler.Cooler(
        op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool"))
    regions = io.read_viewframe_from_file(
        op.join(request.fspath.dirname, "data/CN.mm9.toy_regions.bed"),
        verify_cooler=clr,
    )
    exp = io.read_expected_from_file(
        op.join(request.fspath.dirname, "data/CN.mm9.toy_expected.tsv"),
        expected_value_cols=["balanced.avg"],
        verify_view=regions,
        verify_cooler=clr,
    )

    features = bf.read_table(op.join(request.fspath.dirname,
                                     "data/toy_features.bed"),
                             schema="bed")
    cc = CoordCreator(
        features,
        1_000_000,
        features_format="bed",
        local=False,
        flank=2_000_000,
        mindist=0,
    )
    # Test with ooe=True
    pu = PileUpper(clr, cc, expected=exp, view_df=regions, ooe=True)
    pup = pu.pileupsByStrandWithControl()
    assert np.all(pup.sort_values("orientation")["n"] == [1, 3, 1, 1])
    # Test with ooe=False
    pu = PileUpper(clr, cc, expected=exp, view_df=regions, ooe=False)
    pup = pu.pileupsByStrandWithControl()
    assert np.all(pup.sort_values("orientation")["n"] == [1, 3, 1, 1])
    # No regions provided without expected
    pu = PileUpper(clr, cc, expected=False, ooe=False)
    pup = pu.pileupsByStrandWithControl()
    assert np.all(pup.sort_values("orientation")["n"] == [1, 3, 1, 1])
    # Unbalanced
    pu = PileUpper(clr,
                   cc,
                   expected=False,
                   ooe=False,
                   clr_weight_name=None,
                   coverage_norm=True)
    pup = pu.pileupsByStrandWithControl()
    assert np.all(pup.sort_values("orientation")["n"] == [1, 3, 1, 1])
コード例 #7
0
def test_ondiag_pileups_with_expected(request):
    """
    Test the snipping on matrix:
    """
    # Read cool file and create regions out of it:
    clr = cooler.Cooler(
        op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool"))
    exp = pd.read_table(
        op.join(request.fspath.dirname, "data/CN.mm9.toy_expected.tsv"))
    regions = bioframe.read_table(op.join(request.fspath.dirname,
                                          "data/CN.mm9.toy_regions.bed"),
                                  schema="bed4")
    for snipper_class in (
            cooltools.snipping.ObsExpSnipper,
            cooltools.snipping.ExpectedSnipper,
    ):
        snipper = snipper_class(clr, exp, regions=regions)

        # I.
        # Example region with windows, two regions from annotated genomic regions:
        windows = cooltools.snipping.make_bin_aligned_windows(
            1_000_000, ["chr1", "chr1"], [102_000_000, 105_000_000],
            flank_bp=2_000_000)
        windows = cooltools.snipping.assign_regions(
            windows, regions).reset_index(drop=True)
        stack = cooltools.snipping.pileup(windows,
                                          snipper.select,
                                          snipper.snip,
                                          map=map)

        # Check that the size of snips is OK and there are two of them:
        assert stack.shape == (5, 5, 2)

        # II.
        # Example region with windows, second window comes from unannotated genomic region:
        windows = cooltools.snipping.make_bin_aligned_windows(
            1_000_000, ["chr1", "chr1"], [120_000_000, 160_000_000],
            flank_bp=2_000_000)
        windows = cooltools.snipping.assign_regions(
            windows, regions).reset_index(drop=True)

        stack = cooltools.snipping.pileup(windows,
                                          snipper.select,
                                          snipper.snip,
                                          map=map)

        assert stack.shape == (5, 5, 2)
        assert np.all(np.isnan(stack[:, :, 1]))
コード例 #8
0
ファイル: test_snipping.py プロジェクト: open2c/cooltools
def test_ondiag_pileup_legacy_without_expected(request):
    """
    Test the snipping on matrix:
    """
    # Read cool file and create view_df out of it:
    clr = cooler.Cooler(
        op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool"))
    view_df = bioframe.read_table(op.join(request.fspath.dirname,
                                          "data/CN.mm9.toy_regions.bed"),
                                  schema="bed4")

    # I.
    # Example region with windows, two regions from annotated genomic regions:
    windows = cooltools.api.snipping.make_bin_aligned_windows(
        1_000_000, ["chr1", "chr1"], [120_000_000, 160_000_000],
        flank_bp=2_000_000)

    windows = cooltools.api.snipping.assign_regions(
        windows, view_df).reset_index(drop=True)

    snipper = cooltools.api.snipping.CoolerSnipper(clr,
                                                   view_df=view_df,
                                                   min_diag=None)
    stack = cooltools.api.snipping.pileup_legacy(windows,
                                                 snipper.select,
                                                 snipper.snip,
                                                 map=map)

    # Check that the size of snips is OK and there are two of them:
    assert stack.shape == (5, 5, 2)

    # II.
    # Example region with windows, second window comes from unannotated genomic region:
    windows = cooltools.api.snipping.make_bin_aligned_windows(
        1_000_000, ["chr1", "chr1"], [120_000_000, 160_000_000],
        flank_bp=2_000_000)
    windows = cooltools.api.snipping.assign_regions(
        windows, view_df).reset_index(drop=True)

    stack = cooltools.api.snipping.pileup_legacy(windows,
                                                 snipper.select,
                                                 snipper.snip,
                                                 map=map)

    assert stack.shape == (5, 5, 2)
    assert np.all(np.isfinite(stack[:, :, 0]))
    assert np.all(np.isnan(stack[:, :, 1]))
コード例 #9
0
def test_snipper_with_regions_and_expected(request):
    clr = cooler.Cooler(
        op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool"))
    exp = pd.read_table(
        op.join(request.fspath.dirname, "data/CN.mm9.toy_expected.tsv"))
    regions = bioframe.read_table(op.join(request.fspath.dirname,
                                          "data/CN.mm9.toy_regions.bed"),
                                  schema="bed4")
    for snipper_class in (
            cooltools.snipping.ObsExpSnipper,
            cooltools.snipping.ExpectedSnipper,
    ):
        snipper = snipper_class(clr, exp, regions=regions)
        matrix = snipper.select("foo", "foo")
        snippet = snipper.snip(
            matrix, "foo", "foo",
            (110_000_000, 120_000_000, 110_000_000, 120_000_000))
        assert snippet.shape is not None
コード例 #10
0
ファイル: test_snipping.py プロジェクト: Phlya/cooltools
def test_offdiag_pileups_with_expected(request):
    """
    Test the snipping on matrix:
    """
    # Read cool file and create view_df out of it:
    clr = cooler.Cooler(
        op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool"))
    exp = pd.read_table(
        op.join(request.fspath.dirname, "data/CN.mm9.toy_expected.tsv"))
    view_df = bioframe.read_table(op.join(request.fspath.dirname,
                                          "data/CN.mm9.toy_regions.bed"),
                                  schema="bed4")
    for snipper_class in (
            cooltools.snipping.ObsExpSnipper,
            cooltools.snipping.ExpectedSnipper,
    ):

        snipper = snipper_class(clr, exp, view_df=view_df)

        # I.
        # Example region with windows, two off-diagonal features from annotated genomic regions:
        windows1 = cooltools.snipping.make_bin_aligned_windows(
            1_000_000, ["chr1", "chr1"], [102_000_000, 105_000_000],
            flank_bp=2_000_000)
        windows2 = cooltools.snipping.make_bin_aligned_windows(
            1_000_000, ["chr1", "chr1"], [105_000_000, 109_000_000],
            flank_bp=2_000_000)
        windows = pd.merge(windows1,
                           windows2,
                           left_index=True,
                           right_index=True,
                           suffixes=("1", "2"))
        windows = cooltools.snipping.assign_regions(
            windows, view_df).reset_index(drop=True)

        stack = cooltools.snipping.pileup_legacy(windows,
                                                 snipper.select,
                                                 snipper.snip,
                                                 map=map)

        # Check that the size of snips is OK and there are two of them:
        assert stack.shape == (5, 5, 2)

        # II.
        # Example region with windows, second window is between two different regions:
        windows1 = cooltools.snipping.make_bin_aligned_windows(
            1_000_000, ["chr1", "chr1"], [102_000_000, 10_000_000],
            flank_bp=2_000_000)
        windows2 = cooltools.snipping.make_bin_aligned_windows(
            1_000_000, ["chr1", "chr1"], [105_000_000, 109_000_000],
            flank_bp=2_000_000)
        windows = pd.merge(windows1,
                           windows2,
                           left_index=True,
                           right_index=True,
                           suffixes=("1", "2"))
        windows = cooltools.snipping.assign_regions(
            windows, view_df).reset_index(drop=True)

        stack = cooltools.snipping.pileup_legacy(windows,
                                                 snipper.select,
                                                 snipper.snip,
                                                 map=map)

        assert stack.shape == (5, 5, 2)
        assert np.all(np.isnan(stack[:, :, 1]))
コード例 #11
0
ファイル: test_snipping.py プロジェクト: open2c/cooltools
def test_pileup(request):

    # Read cool file and create view_df out of it:
    clr = cooler.Cooler(
        op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool"))
    exp = pd.read_table(
        op.join(request.fspath.dirname, "data/CN.mm9.toy_expected.tsv"))
    view_df = bioframe.read_table(op.join(request.fspath.dirname,
                                          "data/CN.mm9.toy_regions.bed"),
                                  schema="bed4")

    # I.
    # Example on-diagonal features, two regions from annotated genomic regions:
    windows = pd.DataFrame({
        "chrom": ["chr1", "chr1"],
        "start": [102_000_000, 108_000_000],
        "end": [107_000_000, 113_000_000],
    })
    stack = cooltools.api.snipping.pileup(clr,
                                          windows,
                                          view_df,
                                          exp,
                                          flank=None)
    # Check that the size of snips is OK and there are two of them:
    assert stack.shape == (5, 5, 2)

    # II.
    # Example off-diagonal features, two regions from annotated genomic regions:
    windows = pd.DataFrame({
        "chrom1": ["chr1", "chr1"],
        "start1": [102_000_000, 107_000_000],
        "end1": [107_000_000, 112_000_000],
        "chrom2": ["chr1", "chr1"],
        "start2": [107_000_000, 113_000_000],
        "end2": [112_000_000, 118_000_000],
    })
    stack = cooltools.api.snipping.pileup(clr,
                                          windows,
                                          view_df,
                                          exp,
                                          flank=None)
    # Check that the size of snips is OK and there are two of them:
    assert stack.shape == (5, 5, 2)

    # III.
    # Example off-diagonal features, one region outside the view:
    windows = pd.DataFrame({
        "chrom1": ["chr1", "chr1"],
        "start1": [90_000_000, 105_000_000],
        "end1": [95_000_000, 110_000_000],
        "chrom2": ["chr1", "chr1"],
        "start2": [105_000_000, 110_000_000],
        "end2": [110_000_000, 115_000_000],
    })
    stack = cooltools.api.snipping.pileup(clr,
                                          windows,
                                          view_df,
                                          exp,
                                          flank=None)
    # Check that the size of snips is OK and there are two of them:
    assert stack.shape == (5, 5, 2)

    assert np.all(np.isnan(stack[:, :, 0]))

    # IV.
    # Example on-diagonal features, not valid bedframes (start>end):
    windows = pd.DataFrame({
        "chrom": ["chr1", "chr1"],
        "start": [107_000_000, 108_000_000],
        "end": [102_000_000, 113_000_000],
    })
    with pytest.raises(ValueError):
        stack = cooltools.api.snipping.pileup(clr,
                                              windows,
                                              view_df,
                                              exp,
                                              flank=None)

    # DRAFT # Should work with force=True:
    # stack = cooltools.api.snipping.pileup(clr, windows, view_df, exp, flank=None, force=True)
    # # Check that the size of snips is OK and there are two of them:
    # assert stack.shape == (5, 5, 2)

    # IV.
    # Example of-diagonal features not valid bedframes (start>end):
    windows = pd.DataFrame({
        "chrom1": ["chr1", "chr1"],
        "start1": [107_000_000, 107_000_000],
        "end1": [102_000_000, 112_000_000],
        "chrom2": ["chr1", "chr1"],
        "start2": [107_000_000, 113_000_000],
        "end2": [112_000_000, 118_000_000],
    })
    with pytest.raises(ValueError):
        stack = cooltools.api.snipping.pileup(clr,
                                              windows,
                                              view_df,
                                              exp,
                                              flank=None)
コード例 #12
0
def read_viewframe_from_file(
    view_fname,
    verify_cooler=None,
    check_sorting=False,
):
    """
    Read a BED file with regions that conforms
    a definition of a viewframe (non-overlaping, unique names, etc).

    Parameters
    ----------
    view_fname : str
        Path to a BED file with regions.
    verify_cooler : cooler | None
        cooler object to get chromsizes for bound checking
        No checks are done when None.
    check_sorting : bool
        Check is regions in view_df are sorted as in
        chromosomes in cooler.

    Returns
    -------
    view_df : pd.DataFrame
        DataFrame with the viewframe
    """

    # read BED file assuming bed4/3 formats (with names-columns and without):
    try:
        view_df = bioframe.read_table(view_fname,
                                      schema="bed4",
                                      index_col=False)
    except Exception as err_bed4:
        try:
            view_df = bioframe.read_table(view_fname,
                                          schema="bed3",
                                          index_col=False)
        except Exception as err_bed3:
            raise ValueError(
                f"{view_fname} is not a BED file with 3 or 4 columns"
            ) from err_bed4

    # Convert view dataframe to viewframe:
    try:
        view_df = bioframe.make_viewframe(view_df)
    except ValueError as e:
        raise ValueError(
            "View table is incorrect, please, comply with the format. ") from e

    if verify_cooler is not None:
        try:
            _ = is_compatible_viewframe(view_df,
                                        verify_cooler,
                                        check_sorting,
                                        raise_errors=True)
        except Exception as e:
            raise ValueError(
                "view_df is not compatible with the cooler") from e
        else:
            # view_df is compaible, returning
            return view_df
    else:
        # no cooler for checking, returning
        return view_df
コード例 #13
0
ファイル: compute_pileup.py プロジェクト: Phlya/cooltools
def compute_pileup(
    cool_path,
    features,
    view,
    expected,
    flank,
    features_format,
    weight_name,
    out,
    out_format,
    store_snips,
    nproc,
    ignore_diags,
    aggregate,
    force,
    verbose,
):
    """
    Perform retrieval of the snippets from .cool file.

    COOL_PATH : The paths to a .cool file with a balanced Hi-C map. Use the
    '::' syntax to specify a group path in a multicooler file.

    FEATURES_PATH : the path to a BED or BEDPE-like file that contains features for snipping windows.
    If BED, then the features are on-diagonal. If BEDPE, then the features
    can be off-diagonal (but not in trans or between different regions in the view).

    """

    clr = cooler.Cooler(cool_path)

    #### Read the features:
    buf, names = sniff_for_header(features)
    if features_format.lower() == "bedpe":
        default_cols = [0, 1, 2, 3, 4, 5]
        bedpe_cols = ["chrom1", "start1", "end1", "chrom2", "start2", "end2"]
        dtypes = {
            "chrom1": str,
            "start1": np.int64,
            "end1": np.int64,
            "chrom2": str,
            "start2": np.int64,
            "end2": np.int64,
        }
        if names is None:
            kwargs = dict(
                header=None,
                usecols=default_cols,
                dtype=dtypes,
                names=bedpe_cols,
            )
        else:
            kwargs = dict(header="infer", usecols=bedpe_cols)
    elif features_format.lower() == "bed":
        default_cols = [0, 1, 2]
        bed_cols = ["chrom", "start", "end"]
        dtypes = {"chrom": str, "start": np.int64, "end": np.int64}
        if names is None:
            kwargs = dict(
                header=None,
                names=bed_cols,
            )
        else:
            kwargs = dict(header="infer", usecols=bed_cols)
    else:
        raise ValueError(
            "Automatic detection of features format is not implemented yet. "
            "Please provide BED or BEDPE as --features-format")

    features_df = pd.read_table(buf,
                                comment="#",
                                usecols=default_cols,
                                dtype=dtypes,
                                verbose=verbose,
                                **kwargs)

    ###### Define view for cis compartment-calling
    # use input "view" BED file or all chromosomes mentioned in "track":
    if view is None:
        # Generate viewframe from clr.chromsizes:
        view_df = bioframe.make_viewframe([(chrom, 0, clr.chromsizes[chrom])
                                           for chrom in clr.chromnames])
        if not bioframe.is_contained(features_df, view_df):
            raise ValueError(
                "Features are not contained in chromosomes bounds")
    else:
        # Make viewframe out of table:
        # Read view_df:
        try:
            view_df = bioframe.read_table(view, schema="bed4", index_col=False)
        except Exception:
            view_df = bioframe.read_table(view, schema="bed3", index_col=False)
        # Convert view_df to viewframe:
        try:
            view_df = bioframe.make_viewframe(view_df,
                                              check_bounds=clr.chromsizes)
        except ValueError as e:
            raise ValueError(
                "View table is incorrect, please, comply with the format. "
            ) from e

    if not bioframe.is_contained(features_df, view_df):
        raise ValueError("Features are not contained in view bounds")

    ##### Read expected, should be cis-expected:
    if not expected is None:
        expected_path, expected_value_col = expected
        expected_summary_cols = [
            expected_value_col,
        ]
        expected = read_expected(
            expected_path,
            contact_type="cis",
            expected_value_cols=expected_summary_cols,
            verify_view=view_df,
            verify_cooler=clr,
        )

    ##### CReate the pileup:
    stack = snipping.pileup(
        clr,
        features_df,
        view_df=view_df,
        expected_df=expected,
        flank=flank,
        min_diag=ignore_diags,  # TODO: implement in pileup API
        clr_weight_name=weight_name,  # TODO: implement in pileup API
        force=force,  # TODO: implement in pileup API
        nproc=nproc,
    )

    ##### Aggregate the signal:
    aggregate = aggregate.lower()
    if aggregate is None or aggregate == "mean" or aggregate == "none":
        agg_func = np.nanmean
    elif aggregate == "median":
        agg_func = np.nanmedian
    elif aggregate == "min":
        agg_func = np.nanmin
    elif aggregate == "max":
        agg_func = np.nanmax
    elif aggregate == "std":
        agg_func = np.nanstd
    else:
        raise ValueError(
            f"Aggregation mode {aggregate} not supported. Please use mean/median/min/max/std."
        )

    pileup = agg_func(stack, axis=2)

    ##### Store the data as NPZ file:
    if out_format.lower() == "npz":
        if store_snips:
            np.savez(out, pileup=pileup)
        else:
            np.savez(out, pileup=pileup, stack=stack)
    elif out_format.lower() == "hdf5":
        h5 = h5py.File(out, "w")
        h5.create_dataset("pileup", data=pileup)
        if store_snips:
            h5.create_dataset("stack", data=stack)
コード例 #14
0
def compute_expected(
    cool_path,
    nproc,
    chunksize,
    output,
    contact_type,
    view,
    balance,
    clr_weight_name,
    ignore_diags,
):
    """
    Calculate expected Hi-C signal either for cis or for trans regions
    of chromosomal interaction map.

    When balancing weights are not applied to the data, there is no
    masking of bad bins performed.

    COOL_PATH : The paths to a .cool file with a balanced Hi-C map.

    """

    clr = cooler.Cooler(cool_path)
    if view is not None:
        # Read view_df dataframe:
        try:
            view_df = bioframe.read_table(view, schema="bed4", index_col=False)
        except Exception:
            view_df = bioframe.read_table(view, schema="bed3", index_col=False)
        # Convert view dataframe to viewframe:
        try:
            view_df = bioframe.make_viewframe(view_df, check_bounds=clr.chromsizes)
        except ValueError as e:
            raise ValueError(
                "View table is incorrect, please, comply with the format. "
            ) from e
    else:
        view_df = None # full chromosome case

    if contact_type == "cis":
        result = expected.get_cis_expected(
            clr,
            view_df=view_df,
            intra_only=True,
            clr_weight_name=clr_weight_name if balance else None,
            ignore_diags=ignore_diags,
            chunksize=chunksize,
            nproc=nproc
        )
    elif contact_type == "trans":
        result = expected.get_trans_expected(
            clr,
            view_df=view_df,
            clr_weight_name=clr_weight_name if balance else None,
            chunksize=chunksize,
            nproc=nproc,
        )

    # output to file if specified:
    if output:
        result.to_csv(output, sep="\t", index=False, na_rep="nan")
    # or print into stdout otherwise:
    else:
        print(result.to_csv(sep="\t", index=False, na_rep="nan"))
コード例 #15
0
ファイル: call_compartments.py プロジェクト: Phlya/cooltools
def call_compartments(
    cool_path,
    reference_track,
    view,
    contact_type,
    n_eigs,
    verbose,
    out_prefix,
    bigwig,
):
    """
    Perform eigen value decomposition on a cooler matrix to calculate
    compartment signal by finding the eigenvector that correlates best with the
    phasing track.


    COOL_PATH : the paths to a .cool file with a balanced Hi-C map. Use the
    '::' syntax to specify a group path in a multicooler file.

    TRACK_PATH : the path to a BedGraph-like file that stores phasing track as
    track-name named column.

    BedGraph-like format assumes tab-separated columns chrom, start, stop and
    track-name.

    """
    clr = cooler.Cooler(cool_path)

    if reference_track is not None:

        # TODO: This all needs to be refactored into a more generic tabular file parser
        # Needs to handle stdin case too.
        track_path, col = reference_track
        buf, names = sniff_for_header(track_path)

        if names is None:
            if not isinstance(col, int):
                raise click.BadParameter(
                    "No header found. "
                    'Cannot find "{}" column without a header.'.format(col))

            track_name = "ref"
            kwargs = dict(
                header=None,
                usecols=[0, 1, 2, col],
                names=["chrom", "start", "end", track_name],
            )
        else:
            if isinstance(col, int):
                try:
                    col = names[col]
                except IndexError:
                    raise click.BadParameter(
                        'Column #{} not compatible with header "{}".'.format(
                            col, ",".join(names)))
            else:
                if col not in names:
                    raise click.BadParameter(
                        'Column "{}" not found in header "{}"'.format(
                            col, ",".join(names)))

            track_name = col
            kwargs = dict(header="infer",
                          usecols=["chrom", "start", "end", track_name])

        track_df = pd.read_table(buf,
                                 dtype={
                                     "chrom": str,
                                     "start": np.int64,
                                     "end": np.int64,
                                     track_name: np.float64,
                                 },
                                 comment="#",
                                 verbose=verbose,
                                 **kwargs)

        # we need to merge phasing track DataFrame with the cooler bins to get
        # a DataFrame with phasing info aligned and validated against bins inside of
        # the cooler file.
        track = pd.merge(left=clr.bins()[:],
                         right=track_df,
                         how="left",
                         on=["chrom", "start", "end"])

        # sanity check would be to check if len(bins) becomes > than nbins ...
        # that would imply there was something in the track_df that didn't match
        # ["chrom", "start", "end"] - keys from the c.bins()[:] .
        if len(track) > len(clr.bins()):
            ValueError(
                "There is something in the {} that ".format(track_path) +
                "couldn't be merged with cooler-bins {}".format(cool_path))
    else:
        # use entire bin-table from cooler, when reference-track is not provided:
        track = clr.bins()[["chrom", "start", "end"]][:]
        track_name = None

    # define view for cis compartment-calling
    # use input "view" BED file or all chromosomes mentioned in "track":
    if view is None:
        # Generate viewframe from clr.chromsizes:
        view_df = bioframe.make_viewframe([(chrom, 0, clr.chromsizes[chrom])
                                           for chrom in clr.chromnames])
    else:
        # Make viewframe out of table:
        # Read view_df:
        try:
            view_df = bioframe.read_table(view, schema="bed4", index_col=False)
        except Exception:
            view_df = bioframe.read_table(view, schema="bed3", index_col=False)
        # Convert view_df to viewframe:
        try:
            view_df = bioframe.make_viewframe(view_df,
                                              check_bounds=clr.chromsizes)
        except ValueError as e:
            raise ValueError(
                "View table is incorrect, please, comply with the format. "
            ) from e

    # TODO: Add check that view_df has the same bins as track

    # it's contact_type dependent:
    if contact_type == "cis":
        eigvals, eigvec_table = eigdecomp.cooler_cis_eig(
            clr=clr,
            bins=track,
            view_df=view_df,
            n_eigs=n_eigs,
            phasing_track_col=track_name,
            clip_percentile=99.9,
            sort_metric=None,
        )
    elif contact_type == "trans":
        eigvals, eigvec_table = eigdecomp.cooler_trans_eig(
            clr=clr,
            bins=track,
            n_eigs=n_eigs,
            partition=None,
            phasing_track_col=track_name,
            sort_metric=None,
        )

    # Output
    eigvals.to_csv(out_prefix + "." + contact_type + ".lam.txt",
                   sep="\t",
                   index=False)
    eigvec_table.to_csv(out_prefix + "." + contact_type + ".vecs.tsv",
                        sep="\t",
                        index=False)
    if bigwig:
        bioframe.to_bigwig(
            eigvec_table,
            clr.chromsizes,
            out_prefix + "." + contact_type + ".bw",
            value_field="E1",
        )