Beispiel #1
0
def test_bigwigwriter_not_sorted(tmpdir):
    from kipoi.writers import BigWigWriter
    import pyBigWig
    import tempfile
    tmpfile = str(tmpdir.mkdir("example").join("out.bw"))
    bww = BigWigWriter(tmpfile, chrom_sizes=[('chr1', 1000), ('chr10', 1000)], is_sorted=False)
    regions = [
        ({"chr": "chr1", "start": 30, "end": 40}, np.arange(10)[::-1]),
        ({"chr": "chr1", "start": 10, "end": 20}, np.arange(10)),
        ({"chr": "chr10", "start": 10, "end": 20}, np.arange(10))
    ]
    for region, data in regions:
        bww.region_write(region, data)
    bww.close()
    # load the bigwig file and validate the values
    r = pyBigWig.open(tmpfile)

    for region, data in regions:
        # query the values
        assert np.allclose(data, r.values(region['chr'],
                                          region['start'],
                                          region['end'], numpy=True))
    # assert there are no values here
    assert np.isnan(r.values("chr1", 20, 30, numpy=True)).all()
    r.close()
Beispiel #2
0
def test_bigwigwriter():
    from kipoi.writers import BigWigWriter
    import pyBigWig
    import tempfile
    temp_path = tempfile.mkstemp()[1]
    with pytest.raises(Exception):
        bww = BigWigWriter(temp_path)
        regions = {"chr": ["chr1", "chr7", "chr2"], "start": [10, 30, 20], "end": [11, 31, 21]}
        values = [3.0, 4.0, 45.4]
        for i, val in enumerate(values):
            reg = {k: v[i] for k, v in regions.items()}
            bww.region_write(reg, np.array([val]))
        bww.close()
        bww_2 = pyBigWig(temp_path)
        for i, val in enumerate(values):
            reg = {k: v[i] for k, v in regions.items()}
            bww.region_write(reg, [val])
            assert bww_2.entries(reg["chr"], reg["start"], reg["end"])[0][2] == val
Beispiel #3
0
def contrib2bw(contrib_file, output_prefix):
    """Convert the contribution file to bigwigs
    """
    from kipoi.writers import BigWigWriter
    from bpnet.cli.contrib import ContribFile
    from bpnet.cli.modisco import get_nonredundant_example_idx
    output_dir = os.path.dirname(output_prefix)
    add_file_logging(output_dir, logger, 'contrib2bw')
    os.makedirs(output_dir, exist_ok=True)

    cf = ContribFile(contrib_file)

    # remove overlapping intervals
    ranges = cf.get_ranges()
    keep_idx = get_nonredundant_example_idx(ranges, width=None)
    cf.include_samples = keep_idx
    discarded = len(ranges) - len(keep_idx)
    logger.info(
        f"{discarded}/{len(ranges)} of ranges will be discarded due to overlapping intervals"
    )

    contrib_scores = cf.available_contrib_scores(
    )  # TODO - implement contrib_wildcard to filter them
    chrom_sizes = [(k, v) for k, v in cf.get_chrom_sizes().items()]
    ranges = cf.ranges()

    assert len(ranges) == len(keep_idx)

    delim = "." if not output_prefix.endswith("/") else ""

    for contrib_score in contrib_scores:
        contrib_dict = cf.get_contrib(contrib_score=contrib_score)
        contrib_score_name = contrib_score.replace("/", "_")

        for task, contrib in contrib_dict.items():
            output_file = output_prefix + f'{delim}.contrib.{contrib_score_name}.{task}.bw'
            logger.info(f"Genrating {output_file}")
            contrib_writer = BigWigWriter(output_file,
                                          chrom_sizes=chrom_sizes,
                                          is_sorted=False)

            for idx in range(len(ranges)):
                contrib_writer.region_write(region={
                    "chr": ranges['chrom'].iloc[idx],
                    "start": ranges['start'].iloc[idx],
                    "end": ranges['end'].iloc[idx]
                },
                                            data=contrib[idx])
            contrib_writer.close()
    logger.info("Done!")