Exemple #1
0
def test_intron_exon_reads():
    gff = pybedtools.example_filename('gdc.gff')
    bam = pybedtools.example_filename('gdc.bam')
    cmds = [
        'intron_exon_reads.py',
        '--gff', gff, '--bam', bam, '--processes', '2']
    out =  sp.check_output(cmds, universal_newlines=True)
    assert out == dedent(
        """\
        exon_only	3
        intron_only	3
        intron_and_exon	1
        """)


    cmds = [
        'intron_exon_reads.py',
        '--gff', gff, '--bam', bam, '--processes', '2', '--stranded']
    out =  sp.check_output(cmds, universal_newlines=True)
    assert out == dedent(
        """\
        exon_only	0
        intron_only	0
        intron_and_exon	0
        """)
Exemple #2
0
def test_intron_exon_reads():
    gff = pybedtools.example_filename("gdc.gff")
    bam = pybedtools.example_filename("gdc.bam")
    cmds = [
        "intron_exon_reads.py", "--gff", gff, "--bam", bam, "--processes", "2"
    ]
    out = sp.check_output(cmds, universal_newlines=True)
    assert out == dedent("""\
        exon_only	3
        intron_only	3
        intron_and_exon	1
        """)

    cmds = [
        "intron_exon_reads.py",
        "--gff",
        gff,
        "--bam",
        bam,
        "--processes",
        "2",
        "--stranded",
    ]
    out = sp.check_output(cmds, universal_newlines=True)
    assert out == dedent("""\
        exon_only	0
        intron_only	0
        intron_and_exon	0
        """)
Exemple #3
0
def test_bed_methods():
    """
    Generator that yields tests, inserting different versions of `bed` as needed
    """
    for method, send_kwargs, expected in parse_yaml(config_fn):
        ignore = ['a', 'b','abam','i']
        skip_test = False
        for i in ignore:
            if i in send_kwargs:
                skip_test = True
        if skip_test:
            continue
        if 'bed' not in send_kwargs:
            continue

        if 'files' in send_kwargs:
            send_kwargs['files'] = [pybedtools.example_filename(i) for i in send_kwargs['files']]

        if 'bams' in send_kwargs:
            send_kwargs['bams'] = [pybedtools.example_filename(i) for i in send_kwargs['bams']]

        if 'fi' in send_kwargs:
            send_kwargs['fi'] = pybedtools.example_filename(send_kwargs['fi'])

        orig_bed = pybedtools.example_bedtool(send_kwargs['bed'])

        del send_kwargs['bed']

        done = []
        for kind_bed in ('filename', 'generator', 'stream', 'gzip'):
            bed = converter[kind_bed](orig_bed)
            kind = 'i=%(kind_bed)s' % locals()
            f = partial(run, method, bed, expected, **send_kwargs)
            f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals()
            yield (f, )
Exemple #4
0
def main():
    ap = argparse.ArgumentParser(description=__doc__,
                          formatter_class=argparse.RawDescriptionHelpFormatter)
    ap.add_argument('--bed', help='BED file of e.g. peaks')
    ap.add_argument('--gff', help='GFF file of e.g. annotations')
    ap.add_argument('--out', default='out.png', help='Output PNG file')
    ap.add_argument('--stranded', action='store_true',
                    help='Use strand-specific intersections')
    ap.add_argument('--include', nargs='*', help='Featuretypes to include')
    ap.add_argument('--exclude', nargs='*', help='Featuretypes to exclude')
    ap.add_argument('--thresh', type=float, help='Threshold percentage below which output will be suppressed')
    ap.add_argument('--test', action='store_true',
                    help='Run test, overwriting all other args')
    args = ap.parse_args()

    if not args.test:
        if args.include and args.exclude:
            raise ValueError('Cannot specify both --include and --exclude')

        make_pie(bed=args.bed,
                 gff=args.gff,
                 out=args.out,
                 thresh=args.thresh,
                 stranded=args.stranded,
                 include=args.include,
                 exclude=args.exclude)
    else:
        make_pie(bed=pybedtools.example_filename('gdc.bed'),
                 gff=pybedtools.example_filename('gdc.gff'),
                 stranded=True,
                 out='out.png',
                 include=['CDS',
                          'intron',
                          'five_prime_UTR',
                          'three_prime_UTR'])
Exemple #5
0
def test_intron_exon_reads():
    gff = pybedtools.example_filename('gdc.gff')
    bam = pybedtools.example_filename('gdc.bam')
    cmds = [
        'intron_exon_reads.py',
        '--gff', gff, '--bam', bam, '--processes', '2']
    out =  sp.check_output(cmds, universal_newlines=True)
    assert out == dedent(
        """\
        exon_only	3
        intron_only	3
        intron_and_exon	1
        """)


    cmds = [
        'intron_exon_reads.py',
        '--gff', gff, '--bam', bam, '--processes', '2', '--stranded']
    out =  sp.check_output(cmds, universal_newlines=True)
    assert out == dedent(
        """\
        exon_only	0
        intron_only	0
        intron_and_exon	0
        """)
def test_isBAM():
    bam = pybedtools.example_filename("x.bam")
    notabam = pybedtools.example_filename("a.bed")
    open("tiny.txt", "w").close()
    assert pybedtools.helpers.isBAM(bam)
    assert not pybedtools.helpers.isBAM(notabam)
    assert not pybedtools.helpers.isBAM("tiny.txt")
    os.unlink("tiny.txt")
def test_isBAM():
    bam = pybedtools.example_filename('x.bam')
    notabam = pybedtools.example_filename('a.bed')
    open('tiny.txt', 'w').close()
    assert pybedtools.helpers.isBAM(bam)
    assert not pybedtools.helpers.isBAM(notabam)
    assert not pybedtools.helpers.isBAM('tiny.txt')
    os.unlink('tiny.txt')
Exemple #8
0
def main():
    """
    Make a pie chart of features overlapping annotations (e.g., peaks in
    introns, exons, etc)
    """
    ap = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    ap.add_argument("--bed", help="BED file of e.g. peaks")
    ap.add_argument("--gff", help="GFF file of e.g. annotations")
    ap.add_argument("--out", default="out.png", help="Output PNG file")
    ap.add_argument("--stranded",
                    action="store_true",
                    help="Use strand-specific intersections")
    ap.add_argument("--include", nargs="*", help="Featuretypes to include")
    ap.add_argument("--exclude", nargs="*", help="Featuretypes to exclude")
    ap.add_argument(
        "--thresh",
        type=float,
        help="Threshold percentage below which output will be "
        "suppressed",
    )
    ap.add_argument(
        "--test",
        action="store_true",
        help="Run test, overwriting all other args. Result will "
        'be "out.png" in current directory.',
    )
    args = ap.parse_args()

    if not (args.bed and args.gff) and not args.test:
        ap.print_help()
        sys.exit(1)

    if not args.test:
        if args.include and args.exclude:
            raise ValueError("Cannot specify both --include and --exclude")

        make_pie(
            bed=args.bed,
            gff=args.gff,
            out=args.out,
            thresh=args.thresh,
            stranded=args.stranded,
            include=args.include,
            exclude=args.exclude,
        )
    else:
        make_pie(
            bed=pybedtools.example_filename("gdc.bed"),
            gff=pybedtools.example_filename("gdc.gff"),
            stranded=True,
            out="out.png",
            include=[
                "exon", "CDS", "intron", "five_prime_UTR", "three_prime_UTR"
            ],
        )
Exemple #9
0
def test_issue_156():
    # NOTE: this isn't appropriate for including in the test_iter cases, since
    # that tests filenames, gzipped files, and iterators. There's no support
    # for "list of iterators" as the `b` argument. Plus, here we're not
    # concerned with the ability to handle those different input types -- just
    # that lists of filenames works.
    a = pybedtools.example_bedtool('a.bed')
    b = [pybedtools.example_filename('b.bed'),
         pybedtools.example_filename('c.gff')]
    res = str(a.intersect(b))
    assert res == fix(
        """
        chr1    59      100     feature1        0       +
        chr1    155     200     feature2        0       +
        chr1    173     200     feature2        0       +
        chr1    173     200     feature2        0       +
        chr1    100     200     feature2        0       +
        chr1    155     200     feature3        0       -
        chr1    464     500     feature3        0       -
        chr1    485     500     feature3        0       -
        chr1    173     326     feature3        0       -
        chr1    438     500     feature3        0       -
        chr1    495     500     feature3        0       -
        chr1    485     500     feature3        0       -
        chr1    173     326     feature3        0       -
        chr1    438     500     feature3        0       -
        chr1    150     269     feature3        0       -
        chr1    900     901     feature4        0       +
        chr1    900     913     feature4        0       +
        chr1    900     913     feature4        0       +
        chr1    900     950     feature4        0       +
        """), res

    res = str(a.intersect(b, wb=True, names=['B', 'C']))
    assert res == fix(
        """
        chr1	59	100	feature1	0	+	C	chr1	ucb	gene	60	269	.	-	.	ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269
        chr1	155	200	feature2	0	+	B	chr1	155	200	feature5	0	-
        chr1	173	200	feature2	0	+	C	chr1	ucb	CDS	174	326	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        chr1	173	200	feature2	0	+	C	chr1	ucb	mRNA	174	326	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	100	200	feature2	0	+	C	chr1	ucb	gene	60	269	.	-	.	ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269
        chr1	155	200	feature3	0	-	B	chr1	155	200	feature5	0	-
        chr1	464	500	feature3	0	-	C	chr1	ucb	gene	465	805	.	+	.	ID=thaliana_1_465_805;match=scaffold_801404.1;rname=thaliana_1_465_805
        chr1	485	500	feature3	0	-	C	chr1	ucb	CDS	486	605	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        chr1	173	326	feature3	0	-	C	chr1	ucb	CDS	174	326	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        chr1	438	500	feature3	0	-	C	chr1	ucb	CDS	439	630	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        chr1	495	500	feature3	0	-	C	chr1	ucb	mRNA	496	576	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	485	500	feature3	0	-	C	chr1	ucb	mRNA	486	605	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	173	326	feature3	0	-	C	chr1	ucb	mRNA	174	326	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	438	500	feature3	0	-	C	chr1	ucb	mRNA	439	899	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	150	269	feature3	0	-	C	chr1	ucb	gene	60	269	.	-	.	ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269
        chr1	900	901	feature4	0	+	B	chr1	800	901	feature6	0	+
        chr1	900	913	feature4	0	+	C	chr1	ucb	mRNA	631	913	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	900	913	feature4	0	+	C	chr1	ucb	CDS	760	913	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        chr1	900	950	feature4	0	+	C	chr1	ucb	CDS	706	1095	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        """), res
Exemple #10
0
def test_issue_156():
    # NOTE: this isn't appropriate for including in the test_iter cases, since
    # that tests filenames, gzipped files, and iterators. There's no support
    # for "list of iterators" as the `b` argument. Plus, here we're not
    # concerned with the ability to handle those different input types -- just
    # that lists of filenames works.
    a = pybedtools.example_bedtool("a.bed")
    b = [
        pybedtools.example_filename("b.bed"),
        pybedtools.example_filename("c.gff")
    ]
    res = str(a.intersect(b))
    assert res == fix("""
        chr1    59      100     feature1        0       +
        chr1    155     200     feature2        0       +
        chr1    173     200     feature2        0       +
        chr1    173     200     feature2        0       +
        chr1    100     200     feature2        0       +
        chr1    155     200     feature3        0       -
        chr1    464     500     feature3        0       -
        chr1    485     500     feature3        0       -
        chr1    173     326     feature3        0       -
        chr1    438     500     feature3        0       -
        chr1    495     500     feature3        0       -
        chr1    485     500     feature3        0       -
        chr1    173     326     feature3        0       -
        chr1    438     500     feature3        0       -
        chr1    150     269     feature3        0       -
        chr1    900     901     feature4        0       +
        chr1    900     913     feature4        0       +
        chr1    900     913     feature4        0       +
        chr1    900     950     feature4        0       +
        """), res

    res = str(a.intersect(b, wb=True, names=["B", "C"]))
    assert res == fix("""
        chr1	59	100	feature1	0	+	C	chr1	ucb	gene	60	269	.	-	.	ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269
        chr1	155	200	feature2	0	+	B	chr1	155	200	feature5	0	-
        chr1	173	200	feature2	0	+	C	chr1	ucb	CDS	174	326	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        chr1	173	200	feature2	0	+	C	chr1	ucb	mRNA	174	326	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	100	200	feature2	0	+	C	chr1	ucb	gene	60	269	.	-	.	ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269
        chr1	155	200	feature3	0	-	B	chr1	155	200	feature5	0	-
        chr1	464	500	feature3	0	-	C	chr1	ucb	gene	465	805	.	+	.	ID=thaliana_1_465_805;match=scaffold_801404.1;rname=thaliana_1_465_805
        chr1	485	500	feature3	0	-	C	chr1	ucb	CDS	486	605	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        chr1	173	326	feature3	0	-	C	chr1	ucb	CDS	174	326	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        chr1	438	500	feature3	0	-	C	chr1	ucb	CDS	439	630	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        chr1	495	500	feature3	0	-	C	chr1	ucb	mRNA	496	576	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	485	500	feature3	0	-	C	chr1	ucb	mRNA	486	605	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	173	326	feature3	0	-	C	chr1	ucb	mRNA	174	326	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	438	500	feature3	0	-	C	chr1	ucb	mRNA	439	899	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	150	269	feature3	0	-	C	chr1	ucb	gene	60	269	.	-	.	ID=thaliana_1_6160_6269;match=fgenesh1_pg.C_scaffold_1000119;rname=thaliana_1_6160_6269
        chr1	900	901	feature4	0	+	B	chr1	800	901	feature6	0	+
        chr1	900	913	feature4	0	+	C	chr1	ucb	mRNA	631	913	.	+	.	ID=AT1G01010.mRNA;Parent=AT1G01010;rname=AT1G01010
        chr1	900	913	feature4	0	+	C	chr1	ucb	CDS	760	913	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        chr1	900	950	feature4	0	+	C	chr1	ucb	CDS	706	1095	.	+	.	Parent=AT1G01010.mRNA;rname=AT1G01010
        """), res
Exemple #11
0
def test_gzipped_files_can_be_intersected():
    agz = _make_temporary_gzip(pybedtools.example_filename('a.bed'))
    bgz = _make_temporary_gzip(pybedtools.example_filename('b.bed'))

    agz = pybedtools.BedTool(agz)
    bgz = pybedtools.BedTool(bgz)

    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    assert a.intersect(b) == agz.intersect(bgz) == a.intersect(bgz) == agz.intersect(b)
Exemple #12
0
def test_links():
    # have to be careful about the path, since it is embedded in the HTML
    # output -- so make a copy of the example file, and delete when done.
    os.system('cp %s a.links.bed' % pybedtools.example_filename('a.bed'))
    a = pybedtools.BedTool('a.links.bed')
    a = a.links()
    exp = open(pybedtools.example_filename('a.links.html')).read()
    obs = open(a.links_html).read()
    print exp
    print obs
    assert exp == obs
    os.unlink('a.links.bed')
Exemple #13
0
def test_links():
    # have to be careful about the path, since it is embedded in the HTML
    # output -- so make a copy of the example file, and delete when done.
    os.system('cp %s a.links.bed' % pybedtools.example_filename('a.bed'))
    a = pybedtools.BedTool('a.links.bed')
    a = a.links()
    exp = open(pybedtools.example_filename('a.links.html')).read()
    obs = open(a.links_html).read()
    print exp
    print obs
    assert exp == obs
    os.unlink('a.links.bed')
Exemple #14
0
def test_gzip():
    # make new gzipped files on the fly
    agz = pybedtools.BedTool._tmp()
    bgz = pybedtools.BedTool._tmp()
    os.system('gzip -c %s > %s' % (pybedtools.example_filename('a.bed'), agz))
    os.system('gzip -c %s > %s' % (pybedtools.example_filename('b.bed'), bgz))
    agz = pybedtools.BedTool(agz)
    bgz = pybedtools.BedTool(bgz)
    assert agz.file_type == bgz.file_type == 'bed'
    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    assert a.intersect(b) == agz.intersect(bgz) == a.intersect(bgz) == agz.intersect(b)
Exemple #15
0
def test_gzip():
    # make new gzipped files on the fly
    agz = pybedtools.BedTool._tmp()
    bgz = pybedtools.BedTool._tmp()
    os.system("gzip -c %s > %s" % (pybedtools.example_filename("a.bed"), agz))
    os.system("gzip -c %s > %s" % (pybedtools.example_filename("b.bed"), bgz))
    agz = pybedtools.BedTool(agz)
    bgz = pybedtools.BedTool(bgz)
    assert agz.file_type == bgz.file_type == "bed"
    a = pybedtools.example_bedtool("a.bed")
    b = pybedtools.example_bedtool("b.bed")
    assert a.intersect(b) == agz.intersect(bgz) == a.intersect(bgz) == agz.intersect(b)
Exemple #16
0
def test_gzip():
    # make new gzipped files on the fly
    agz = pybedtools.BedTool._tmp()
    bgz = pybedtools.BedTool._tmp()
    os.system('gzip -c %s > %s' % (pybedtools.example_filename('a.bed'), agz))
    os.system('gzip -c %s > %s' % (pybedtools.example_filename('b.bed'), bgz))
    agz = pybedtools.BedTool(agz)
    bgz = pybedtools.BedTool(bgz)
    assert agz.file_type == bgz.file_type == 'bed'
    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    assert a.intersect(b) == agz.intersect(bgz) == a.intersect(
        bgz) == agz.intersect(b)
Exemple #17
0
def test_issue_218():
    from pybedtools.helpers import set_bedtools_path, get_bedtools_path
    from pybedtools import BedTool

    orig_path = get_bedtools_path()

    # As pointed out in #222, example_bedtool behaves differently from BedTool.
    # example_bedtool is defined in pybedtools.bedtool but pybedtools.BedTool
    # is imported in pybedtools.__init__. So check various constructors here.
    for constructor in (
        lambda x: pybedtools.example_bedtool(x),
        lambda x: pybedtools.BedTool(pybedtools.example_filename(x)),
        lambda x: pybedtools.bedtool.BedTool(pybedtools.example_filename(x)),

        # NOTE: we likely need recursive reloading (like IPython.deepreload)
        # for this to work:
        #
        # lambda x: BedTool(pybedtools.example_filename(x)),
    ):

        x = constructor('x.bed')
        x.sort()
        assert "Original BEDTools help" in pybedtools.bedtool.BedTool.sort.__doc__
        assert "Original BEDTools help" in x.sort.__doc__

        set_bedtools_path('nonexistent')

        # Calling BEDTools with non-existent path, but the docstring should not
        # have been changed.
        with pytest.raises(OSError):
            x.sort()
        assert "Original BEDTools help" in x.sort.__doc__

        # The class's docstring should have been reset though.
        assert pybedtools.bedtool.BedTool.sort.__doc__ is None

        # Creating a new BedTool object now that bedtools is not on the path
        # should detect that, adding a method that raises
        # NotImplementedError...
        y = constructor('x.bed')
        with pytest.raises(NotImplementedError):
            y.sort()

        # ...and correspondingly no docstring
        assert y.sort.__doc__ is None
        assert pybedtools.bedtool.BedTool.sort.__doc__ is None

        # Reset the path, and ensure the resetting works
        set_bedtools_path()
        z = constructor('x.bed')
        z.sort()
Exemple #18
0
def test_issue_218():
    from pybedtools.helpers import set_bedtools_path, get_bedtools_path
    from pybedtools import BedTool

    orig_path = get_bedtools_path()

    # As pointed out in #222, example_bedtool behaves differently from BedTool.
    # example_bedtool is defined in pybedtools.bedtool but pybedtools.BedTool
    # is imported in pybedtools.__init__. So check various constructors here.
    for constructor in (
            lambda x: pybedtools.example_bedtool(x),
            lambda x: pybedtools.BedTool(pybedtools.example_filename(x)),
            lambda x: pybedtools.bedtool.BedTool(pybedtools.example_filename(x)
                                                 ),
            # NOTE: we likely need recursive reloading (like IPython.deepreload)
            # for this to work:
            #
            # lambda x: BedTool(pybedtools.example_filename(x)),
    ):

        x = constructor("x.bed")
        x.sort()
        assert "Original BEDTools help" in pybedtools.bedtool.BedTool.sort.__doc__
        assert "Original BEDTools help" in x.sort.__doc__

        set_bedtools_path("nonexistent")

        # Calling BEDTools with non-existent path, but the docstring should not
        # have been changed.
        with pytest.raises(OSError):
            x.sort()
        assert "Original BEDTools help" in x.sort.__doc__

        # The class's docstring should have been reset though.
        assert pybedtools.bedtool.BedTool.sort.__doc__ is None

        # Creating a new BedTool object now that bedtools is not on the path
        # should detect that, adding a method that raises
        # NotImplementedError...
        y = constructor("x.bed")
        with pytest.raises(NotImplementedError):
            y.sort()

        # ...and correspondingly no docstring
        assert y.sort.__doc__ is None
        assert pybedtools.bedtool.BedTool.sort.__doc__ is None

        # Reset the path, and ensure the resetting works
        set_bedtools_path()
        z = constructor("x.bed")
        z.sort()
Exemple #19
0
def main():
    """
    Make a pie chart of features overlapping annotations (e.g., peaks in
    introns, exons, etc)
    """
    ap = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    ap.add_argument('--bed', help='BED file of e.g. peaks')
    ap.add_argument('--gff', help='GFF file of e.g. annotations')
    ap.add_argument('--out', default='out.png', help='Output PNG file')
    ap.add_argument('--stranded',
                    action='store_true',
                    help='Use strand-specific intersections')
    ap.add_argument('--include', nargs='*', help='Featuretypes to include')
    ap.add_argument('--exclude', nargs='*', help='Featuretypes to exclude')
    ap.add_argument('--thresh',
                    type=float,
                    help='Threshold percentage below which output will be '
                    'suppressed')
    ap.add_argument('--test',
                    action='store_true',
                    help='Run test, overwriting all other args. Result will '
                    'be "out.png" in current directory.')
    args = ap.parse_args()

    if not (args.bed and args.gff) and not args.test:
        ap.print_help()
        sys.exit(1)

    if not args.test:
        if args.include and args.exclude:
            raise ValueError('Cannot specify both --include and --exclude')

        make_pie(bed=args.bed,
                 gff=args.gff,
                 out=args.out,
                 thresh=args.thresh,
                 stranded=args.stranded,
                 include=args.include,
                 exclude=args.exclude)
    else:
        make_pie(bed=pybedtools.example_filename('gdc.bed'),
                 gff=pybedtools.example_filename('gdc.gff'),
                 stranded=True,
                 out='out.png',
                 include=[
                     'exon', 'CDS', 'intron', 'five_prime_UTR',
                     'three_prime_UTR'
                 ])
def test_gzipped_files_are_iterable_as_normal():
    agz = _make_temporary_gzip(pybedtools.example_filename('a.bed'))
    agz = pybedtools.BedTool(agz)
    a = pybedtools.example_bedtool('a.bed')
    for i in agz:
        print(i)
    assert_list_equal(list(a), list(agz))
Exemple #21
0
def test_cat():
    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    b_fn = pybedtools.example_filename('b.bed')
    assert a.cat(b) == a.cat(b_fn)
    expected = fix("""
    chr1 1   500
    chr1 800 950
    """)
    assert a.cat(b) == expected

    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    c = a.cat(b, postmerge=False)
    assert len(a) + len(b) == len(c), (len(a), len(b), len(c))

    print c
    assert c == fix("""
    chr1	1	100	feature1	0	+
    chr1	100	200	feature2	0	+
    chr1	150	500	feature3	0	-
    chr1	900	950	feature4	0	+
    chr1	155	200	feature5	0	-
    chr1	800	901	feature6	0	+
    """)
Exemple #22
0
def test_issue_178():
    # Compatibility between py2/py3: py27 does not have FileNotFoundError, so
    # set it to IOError (which does exist) for this function.
    try:
        FileNotFoundError
    except NameError:
        FileNotFoundError = IOError

    try:
        fn = pybedtools.example_filename('gdc.othersort.bam')
        pybedtools.contrib.bigwig.bam_to_bigwig(fn, genome='dm3', output='tmp.bw')
        x = pybedtools.contrib.bigwig.bigwig_to_bedgraph('tmp.bw')
        assert x == fix(
            '''
            chr2L   70      75      1
            chr2L   140     145     1
            chr2L   150     155     1
            chr2L   160     165     1
            chr2L   210     215     1
            chrX    10      15      1
            chrX    70      75      1
            chrX    140     145     1
            ''')
        os.unlink('tmp.bw')

    # If bedGraphToBigWig is not on the path, see
    # https://github.com/daler/pybedtools/issues/227
    except FileNotFoundError:
        pass
Exemple #23
0
def test_cat():
    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    b_fn = pybedtools.example_filename('b.bed')
    assert a.cat(b) == a.cat(b_fn)
    expected =  fix("""
    chr1 1   500
    chr1 800 950
    """)
    assert a.cat(b) == expected

    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    c = a.cat(b, postmerge=False)
    assert len(a) + len(b) == len(c), (len(a), len(b), len(c))

    print c
    assert c == fix("""
    chr1	1	100	feature1	0	+
    chr1	100	200	feature2	0	+
    chr1	150	500	feature3	0	-
    chr1	900	950	feature4	0	+
    chr1	155	200	feature5	0	-
    chr1	800	901	feature6	0	+
    """)
Exemple #24
0
def test_gzipped_files_are_iterable_as_normal():
    agz = _make_temporary_gzip(pybedtools.example_filename('a.bed'))
    agz = pybedtools.BedTool(agz)
    a = pybedtools.example_bedtool('a.bed')
    for i in agz:
        print(i)
    assert list(a) == list(agz)
Exemple #25
0
def test_issue_178():
    # Compatibility between py2/py3: py27 does not have FileNotFoundError, so
    # set it to IOError (which does exist) for this function.
    try:
        FileNotFoundError
    except NameError:
        FileNotFoundError = IOError

    try:
        fn = pybedtools.example_filename("gdc.othersort.bam")
        pybedtools.contrib.bigwig.bam_to_bigwig(fn,
                                                genome="dm3",
                                                output="tmp.bw")
        x = pybedtools.contrib.bigwig.bigwig_to_bedgraph("tmp.bw")
        assert x == fix("""
            chr2L   70      75      1
            chr2L   140     145     1
            chr2L   150     155     1
            chr2L   160     165     1
            chr2L   210     215     1
            chrX    10      15      1
            chrX    70      75      1
            chrX    140     145     1
            """)
        os.unlink("tmp.bw")

    # If bedGraphToBigWig is not on the path, see
    # https://github.com/daler/pybedtools/issues/227
    except FileNotFoundError:
        pass
Exemple #26
0
def test_getting_example_beds():
    assert 'a.bed' in pybedtools.list_example_files()

    a_fn = pybedtools.example_filename('a.bed')
    assert a_fn == os.path.join(testdir, 'data', 'a.bed')

    a = pybedtools.example_bedtool('a.bed')
    assert a.fn == os.path.join(testdir, 'data', 'a.bed')

    # complain appropriately if nonexistent paths are asked for
    e = FileNotFoundError if six.PY3 else ValueError
    with pytest.raises(e):
        pybedtools.example_filename('nonexistent')
    with pytest.raises(e):
        pybedtools.example_bedtool('nonexistent')
    with pytest.raises(e):
        pybedtools.set_tempdir('nonexistent')
Exemple #27
0
def test_getting_example_beds():
    assert 'a.bed' in pybedtools.list_example_files()

    a_fn = pybedtools.example_filename('a.bed')
    assert a_fn == os.path.join(testdir, 'data', 'a.bed')

    a = pybedtools.example_bedtool('a.bed')
    assert a.fn == os.path.join(testdir, 'data', 'a.bed')

    # complain appropriately if nonexistent paths are asked for
    e = FileNotFoundError if six.PY3 else ValueError
    with pytest.raises(e):
        pybedtools.example_filename('nonexistent')
    with pytest.raises(e):
        pybedtools.example_bedtool('nonexistent')
    with pytest.raises(e):
        pybedtools.set_tempdir('nonexistent')
Exemple #28
0
def fetchSequence(chrName, start, end, fastq):
    position = '  '.join([chrName, start, end])
    #print(position)
    bedpos = pybedtools.BedTool(position, from_string=True)
    fasta = pybedtools.example_filename(fastq)
    bedpos = bedpos.sequence(fi=fasta)
    sequence = open(bedpos.seqfn).read()
    return (sequence)
Exemple #29
0
def _classifier():

    c = Classifier(
        bed=pybedtools.example_filename("gdc.bed"),
        annotations=pybedtools.example_filename("gdc.gff"),
    )
    c.classify()

    bed = pybedtools.example_bedtool("gdc.bed")

    assert c.class_counts == {
        frozenset(["UTR", "exon", "mRNA", "CDS", "tRNA", "gene"]): 1,
        frozenset(["intron", "gene", "mRNA"]): 3,
        frozenset([]): 1,
        frozenset(["gene", "exon", "mRNA", "CDS"]): 2,
        frozenset(["exon", "mRNA", "CDS", "tRNA", "intron", "gene"]): 1,
    }

    assert c.feature_classes == {
        bed[0]: set(["."]),
        bed[1]: set(["gene", "exon", "mRNA", "CDS"]),
        bed[2]: set(["intron", "gene", "mRNA"]),
        bed[3]: set(["intron", "gene", "mRNA"]),
        bed[4]: set(["tRNA", "UTR", "exon", "mRNA", "CDS", "gene"]),
        bed[5]: set(["gene", "exon", "mRNA", "CDS"]),
        bed[6]: set(["intron", "gene", "mRNA"]),
        bed[7]: set(["tRNA", "intron", "exon", "mRNA", "CDS", "gene"]),
    }

    print("use these indexes for debugging")
    for i, f in enumerate(bed):
        print(i, f)

    for k, v in list(c.class_features.items()):
        print(k)
        for i in v:
            print("\t" + str(i))

    assert c.class_features == {
        frozenset([]): [bed[0]],
        frozenset(["intron", "gene", "mRNA"]): [bed[6], bed[2], bed[3]],
        frozenset(["gene", "exon", "mRNA", "CDS"]): [bed[5], bed[1]],
        frozenset(["UTR", "exon", "mRNA", "CDS", "tRNA", "gene"]): [bed[4]],
        frozenset(["exon", "mRNA", "CDS", "tRNA", "intron", "gene"]): [bed[7]],
    }
Exemple #30
0
def main():
    """
    Make a pie chart of features overlapping annotations (e.g., peaks in
    introns, exons, etc)
    """
    ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    ap.add_argument("--bed", help="BED file of e.g. peaks")
    ap.add_argument("--gff", help="GFF file of e.g. annotations")
    ap.add_argument("--out", default="out.png", help="Output PNG file")
    ap.add_argument("--stranded", action="store_true", help="Use strand-specific intersections")
    ap.add_argument("--include", nargs="*", help="Featuretypes to include")
    ap.add_argument("--exclude", nargs="*", help="Featuretypes to exclude")
    ap.add_argument("--thresh", type=float, help="Threshold percentage below which output will be " "suppressed")
    ap.add_argument(
        "--test",
        action="store_true",
        help="Run test, overwriting all other args. Result will " 'be "out.png" in current directory.',
    )
    args = ap.parse_args()

    if not (args.bed and args.gff) and not args.test:
        ap.print_help()
        sys.exit(1)

    if not args.test:
        if args.include and args.exclude:
            raise ValueError("Cannot specify both --include and --exclude")

        make_pie(
            bed=args.bed,
            gff=args.gff,
            out=args.out,
            thresh=args.thresh,
            stranded=args.stranded,
            include=args.include,
            exclude=args.exclude,
        )
    else:
        make_pie(
            bed=pybedtools.example_filename("gdc.bed"),
            gff=pybedtools.example_filename("gdc.gff"),
            stranded=True,
            out="out.png",
            include=["exon", "CDS", "intron", "five_prime_UTR", "three_prime_UTR"],
        )
Exemple #31
0
def test_links():
    # have to be careful about the path, since it is embedded in the HTML
    # output.
    a = pybedtools.BedTool(
        os.path.join(os.path.relpath(pybedtools.data_dir()), 'a.bed'))
    a = a.links()
    exp = open(pybedtools.example_filename('a.links.html')).read()
    obs = open(a.links_html).read()
    assert exp == obs
def get_sequence(reference_fasta, coordinates, strand):
    """Takes coordinates and returns sequence
    bed_coor is space separated"""
    bed_coor = pybedtools.BedTool(coordinates, from_string=True)
    fasta = pybedtools.example_filename(reference_fasta)
    seq = bed_coor.sequence(fi=fasta)
    seq_str = open(seq.seqfn, 'r').read()
    pybedtools.cleanup(remove_all=True)
    return seq_str.replace('>', '').split('\n')[0:-1]
def _classifier():

    c = Classifier(
            bed=pybedtools.example_filename('gdc.bed'),
            annotations=pybedtools.example_filename('gdc.gff'))
    c.classify()

    bed = pybedtools.example_bedtool('gdc.bed')

    assert c.class_counts == {
            frozenset(['UTR', 'exon', 'mRNA', 'CDS', 'tRNA', 'gene']): 1,
            frozenset(['intron', 'gene', 'mRNA']): 3,
            frozenset([]): 1,
            frozenset(['gene', 'exon', 'mRNA', 'CDS']): 2,
            frozenset(['exon', 'mRNA', 'CDS', 'tRNA', 'intron', 'gene']): 1}

    assert c.feature_classes == {
            bed[0]: set(['.']),
            bed[1]: set(['gene', 'exon', 'mRNA', 'CDS']),
            bed[2]: set(['intron', 'gene', 'mRNA']),
            bed[3]: set(['intron', 'gene', 'mRNA']),
            bed[4]: set(['tRNA', 'UTR', 'exon', 'mRNA', 'CDS', 'gene']),
            bed[5]: set(['gene', 'exon', 'mRNA', 'CDS']),
            bed[6]: set(['intron', 'gene', 'mRNA']),
            bed[7]: set(['tRNA', 'intron', 'exon', 'mRNA', 'CDS', 'gene']),
            }

    print('use these indexes for debugging')
    for i, f in enumerate(bed):
        print(i, f)

    for k, v in list(c.class_features.items()):
        print(k)
        for i in v:
            print('\t' + str(i))

    assert c.class_features == {
            frozenset([]): [bed[0]],
            frozenset(['intron', 'gene', 'mRNA']): [bed[6], bed[2], bed[3]],
            frozenset(['gene', 'exon', 'mRNA', 'CDS']): [bed[5], bed[1]],
            frozenset(['UTR', 'exon', 'mRNA', 'CDS', 'tRNA', 'gene']): [bed[4]],
            frozenset(['exon', 'mRNA', 'CDS', 'tRNA', 'intron', 'gene']): [bed[7]],
            }
Exemple #34
0
def _get_sequence(chrom, pos, strand, genome):
    a = pybedtools.BedTool("{0}\t{1}\t{2}\t.\t.\t{3}".format(chrom, pos-150, pos+150, strand), from_string=True)
    fasta = pybedtools.example_filename(genome)
    a = a.sequence(fi=fasta,s=True)
    seq = open(a.seqfn).read().split("\n")
    pre = seq[1][:150]
    nt = seq[1][150]
    post = seq[1][151:]
    # print [pre, nt, post]
    return [chrom , str(pos), "%s-%s-%s" % (pre, nt, post)]
Exemple #35
0
def test_classifier():

    c = Classifier(bed=pybedtools.example_filename('gdc.bed'),
                   annotations=pybedtools.example_filename('gdc.gff'))
    c.classify()

    bed = pybedtools.example_bedtool('gdc.bed')

    assert c.class_counts == {
        frozenset(['UTR', 'exon', 'mRNA', 'CDS', 'tRNA', 'gene']): 1,
        frozenset(['intron', 'gene', 'mRNA']): 3,
        frozenset([]): 1,
        frozenset(['gene', 'exon', 'mRNA', 'CDS']): 2,
        frozenset(['exon', 'mRNA', 'CDS', 'tRNA', 'intron', 'gene']): 1
    }

    assert c.feature_classes == {
        bed[0]: set(['.']),
        bed[1]: set(['gene', 'exon', 'mRNA', 'CDS']),
        bed[2]: set(['intron', 'gene', 'mRNA']),
        bed[3]: set(['intron', 'gene', 'mRNA']),
        bed[4]: set(['tRNA', 'UTR', 'exon', 'mRNA', 'CDS', 'gene']),
        bed[5]: set(['gene', 'exon', 'mRNA', 'CDS']),
        bed[6]: set(['intron', 'gene', 'mRNA']),
        bed[7]: set(['tRNA', 'intron', 'exon', 'mRNA', 'CDS', 'gene']),
    }

    print 'use these indexes for debugging'
    for i, f in enumerate(bed):
        print i, f

    for k, v in c.class_features.items():
        print k
        for i in v:
            print '\t' + str(i)

    assert c.class_features == {
        frozenset([]): [bed[0]],
        frozenset(['intron', 'gene', 'mRNA']): [bed[6], bed[2], bed[3]],
        frozenset(['gene', 'exon', 'mRNA', 'CDS']): [bed[5], bed[1]],
        frozenset(['UTR', 'exon', 'mRNA', 'CDS', 'tRNA', 'gene']): [bed[4]],
        frozenset(['exon', 'mRNA', 'CDS', 'tRNA', 'intron', 'gene']): [bed[7]],
    }
Exemple #36
0
def _classifier():

    c = Classifier(bed=pybedtools.example_filename("gdc.bed"), annotations=pybedtools.example_filename("gdc.gff"))
    c.classify()

    bed = pybedtools.example_bedtool("gdc.bed")

    assert c.class_counts == {
        frozenset(["UTR", "exon", "mRNA", "CDS", "tRNA", "gene"]): 1,
        frozenset(["intron", "gene", "mRNA"]): 3,
        frozenset([]): 1,
        frozenset(["gene", "exon", "mRNA", "CDS"]): 2,
        frozenset(["exon", "mRNA", "CDS", "tRNA", "intron", "gene"]): 1,
    }

    assert c.feature_classes == {
        bed[0]: set(["."]),
        bed[1]: set(["gene", "exon", "mRNA", "CDS"]),
        bed[2]: set(["intron", "gene", "mRNA"]),
        bed[3]: set(["intron", "gene", "mRNA"]),
        bed[4]: set(["tRNA", "UTR", "exon", "mRNA", "CDS", "gene"]),
        bed[5]: set(["gene", "exon", "mRNA", "CDS"]),
        bed[6]: set(["intron", "gene", "mRNA"]),
        bed[7]: set(["tRNA", "intron", "exon", "mRNA", "CDS", "gene"]),
    }

    print "use these indexes for debugging"
    for i, f in enumerate(bed):
        print i, f

    for k, v in c.class_features.items():
        print k
        for i in v:
            print "\t" + str(i)

    assert c.class_features == {
        frozenset([]): [bed[0]],
        frozenset(["intron", "gene", "mRNA"]): [bed[6], bed[2], bed[3]],
        frozenset(["gene", "exon", "mRNA", "CDS"]): [bed[5], bed[1]],
        frozenset(["UTR", "exon", "mRNA", "CDS", "tRNA", "gene"]): [bed[4]],
        frozenset(["exon", "mRNA", "CDS", "tRNA", "intron", "gene"]): [bed[7]],
    }
Exemple #37
0
def test_links():
    # have to be careful about the path, since it is embedded in the HTML
    # output.
    a = pybedtools.BedTool(
            os.path.join(
                os.path.relpath(pybedtools.data_dir()),
                'a.bed'))
    a = a.links()
    exp = open(pybedtools.example_filename('a.links.html')).read()
    obs = open(a.links_html).read()
    assert exp == obs
def test_gzipped_output():
    _filename = pybedtools.example_filename('a.bed')
    compressed_file = pybedtools.BedTool(_filename).saveas(compressed=True)

    # Open gzipped file in text mode
    with gzip.open(compressed_file.fn, 'rt') as gf:
        uncompressed_content = gf.read()

    with open(_filename) as f:
        original_content = f.read()

    assert_equal(original_content, uncompressed_content)
Exemple #39
0
def test_a_b_methods():
    """
    Generator that yields tests, inserting different versions of `a` and `b` as
    needed
    """
    for method, send_kwargs, expected in parse_yaml(config_fn):
        a_isbam = False
        b_isbam = False

        if 'abam' in send_kwargs:
            send_kwargs['abam'] = pybedtools.example_filename(
                send_kwargs['abam'])
            send_kwargs['a'] = send_kwargs['abam']
            a_isbam = True

        if not (('a' in send_kwargs) and ('b' in send_kwargs)):
            continue

        # If abam, makes a BedTool out of it anyway.
        orig_a = pybedtools.example_bedtool(send_kwargs['a'])
        orig_b = pybedtools.example_bedtool(send_kwargs['b'])

        del send_kwargs['a']
        del send_kwargs['b']

        if orig_a._isbam:
            a_isbam = True
        if orig_b._isbam:
            b_isbam = True

        for kind_a, kind_b in itertools.permutations(
            ('filename', 'generator', 'stream', 'gzip'), 2):

            if a_isbam and (kind_a not in supported_bam):
                continue

            if b_isbam and (kind_b not in supported_bam):
                continue

            # Convert to file/generator/stream
            bedtool = converter[kind_a](orig_a)
            b = converter[kind_b](orig_b)

            kind = 'a=%(kind_a)s, b=%(kind_b)s abam=%(a_isbam)s bbam=%(b_isbam)s' % locals(
            )

            send_kwargs['b'] = b

            f = partial(run, method, bedtool, expected, **send_kwargs)

            # Meaningful description
            f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals()
            yield (f, )
Exemple #40
0
def test_gzipped_output():
    _filename = pybedtools.example_filename('a.bed')
    compressed_file = pybedtools.BedTool(_filename).saveas(compressed=True)

    # Open gzipped file in text mode
    with gzip.open(compressed_file.fn, 'rt') as gf:
        uncompressed_content = gf.read()

    with open(_filename) as f:
        original_content = f.read()

    assert original_content == uncompressed_content
def test_getting_example_beds():
    assert 'a.bed' in pybedtools.list_example_files()

    a_fn = pybedtools.example_filename('a.bed')
    assert a_fn == os.path.join(testdir, 'data', 'a.bed')

    a = pybedtools.example_bedtool('a.bed')
    assert a.fn == os.path.join(testdir, 'data', 'a.bed')

    # complain appropriately if nonexistent paths are asked for
    assert_raises(ValueError, pybedtools.example_filename, 'nonexistent')
    assert_raises(ValueError, pybedtools.example_bedtool, 'nonexistent')
    assert_raises(ValueError, pybedtools.set_tempdir, 'nonexistent')
Exemple #42
0
def test_i_methods():
    """
    Generator that yields tests, inserting different versions of `i` as needed
    """
    for method, send_kwargs, expected in parse_yaml(config_fn):
        i_isbam = False
        if 'ibam' in send_kwargs:
            i_isbam = True
            send_kwargs['ibam'] = pybedtools.example_filename(
                send_kwargs['ibam'])
            send_kwargs['i'] = send_kwargs['ibam']

        if ('a' in send_kwargs) and ('b' in send_kwargs):
            continue

        if ('i' not in send_kwargs) and ('ibam' not in send_kwargs):
            continue

        if 'files' in send_kwargs:
            send_kwargs['files'] = [
                pybedtools.example_filename(i) for i in send_kwargs['files']
            ]

        orig_i = pybedtools.example_bedtool(send_kwargs['i'])
        if orig_i._isbam:
            i_isbam = True

        del send_kwargs['i']

        done = []
        for kind_i in ('filename', 'generator', 'stream', 'gzip'):
            if i_isbam:
                if (kind_i not in supported_bam):
                    continue
            i = converter[kind_i](orig_i)
            kind = 'i=%(kind_i)s ibam=%(i_isbam)s' % locals()
            f = partial(run, method, i, expected, **send_kwargs)
            f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals()
            yield (f, )
Exemple #43
0
def test_bed_methods():
    """
    Generator that yields tests, inserting different versions of `bed` as needed
    """
    for method, send_kwargs, expected in parse_yaml(config_fn):
        ignore = ['a', 'b', 'abam', 'i']
        skip_test = False
        for i in ignore:
            if i in send_kwargs:
                skip_test = True
        if skip_test:
            continue
        if 'bed' not in send_kwargs:
            continue

        if 'files' in send_kwargs:
            send_kwargs['files'] = [
                pybedtools.example_filename(i) for i in send_kwargs['files']
            ]

        if 'bams' in send_kwargs:
            send_kwargs['bams'] = [
                pybedtools.example_filename(i) for i in send_kwargs['bams']
            ]

        if 'fi' in send_kwargs:
            send_kwargs['fi'] = pybedtools.example_filename(send_kwargs['fi'])

        orig_bed = pybedtools.example_bedtool(send_kwargs['bed'])

        del send_kwargs['bed']

        done = []
        for kind_bed in ('filename', 'generator', 'stream', 'gzip'):
            bed = converter[kind_bed](orig_bed)
            kind = 'i=%(kind_bed)s' % locals()
            f = partial(run, method, bed, expected, **send_kwargs)
            f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals()
            yield (f, )
Exemple #44
0
def test_tabix():
    a = pybedtools.example_bedtool('a.bed')
    t = a.tabix()
    assert t._tabixed()
    results = str(t.tabix_intervals('chr1:99-200'))
    print results
    assert results == fix("""
    chr1	1	100	feature1	0	+
    chr1	100	200	feature2	0	+
    chr1	150	500	feature3	0	-""")

    assert str(t.tabix_intervals(a[2])) == fix("""
    chr1	100	200	feature2	0	+
    chr1	150	500	feature3	0	-""")

    # clean up
    fns = [
        pybedtools.example_filename('a.bed.gz'),
        pybedtools.example_filename('a.bed.gz.tbi'),
    ]
    for fn in fns:
        if os.path.exists(fn):
            os.unlink(fn)
Exemple #45
0
def test_tabix():
    a = pybedtools.example_bedtool('a.bed')
    t = a.tabix()
    assert t._tabixed()
    results = str(t.tabix_intervals('chr1:99-200'))
    print results
    assert results == fix("""
    chr1	1	100	feature1	0	+
    chr1	100	200	feature2	0	+
    chr1	150	500	feature3	0	-""")

    assert str(t.tabix_intervals(a[2])) == fix("""
    chr1	100	200	feature2	0	+
    chr1	150	500	feature3	0	-""")

    # clean up
    fns = [
            pybedtools.example_filename('a.bed.gz'),
            pybedtools.example_filename('a.bed.gz.tbi'),
          ]
    for fn in fns:
        if os.path.exists(fn):
            os.unlink(fn)
Exemple #46
0
def test_a_b_methods():
    """
    Generator that yields tests, inserting different versions of `a` and `b` as
    needed
    """
    for method, send_kwargs, expected in parse_yaml(config_fn):
        a_isbam = False
        b_isbam = False

        if 'abam' in send_kwargs:
            send_kwargs['abam'] = pybedtools.example_filename(send_kwargs['abam'])
            send_kwargs['a'] = send_kwargs['abam']
            a_isbam = True

        if not (('a' in send_kwargs) and ('b' in send_kwargs)):
            continue

        # If abam, makes a BedTool out of it anyway.
        orig_a = pybedtools.example_bedtool(send_kwargs['a'])
        orig_b = pybedtools.example_bedtool(send_kwargs['b'])

        del send_kwargs['a']
        del send_kwargs['b']

        if orig_a._isbam:
            a_isbam = True
        if orig_b._isbam:
            b_isbam = True

        for kind_a, kind_b in itertools.permutations(('filename', 'generator', 'stream', 'gzip'), 2):

            if a_isbam and (kind_a not in supported_bam):
                continue

            if b_isbam and (kind_b not in supported_bam):
                continue

            # Convert to file/generator/stream
            bedtool = converter[kind_a](orig_a)
            b = converter[kind_b](orig_b)

            kind = 'a=%(kind_a)s, b=%(kind_b)s abam=%(a_isbam)s bbam=%(b_isbam)s' % locals()

            send_kwargs['b'] = b

            f = partial(run, method, bedtool, expected, **send_kwargs)

            # Meaningful description
            f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals()
            yield (f, )
Exemple #47
0
def get_trinuc(bed_i, fa_loc):
    """Get trinucleotide context per DNV."""
    trinuc_out_loc = 'trinuc_out/{}_trinuc.txt'.format(bed_i[:-4])
    if os.path.exists(trinuc_out_loc):
        return 'trinuc already made for ' + bed_i
    dnv_bed = pybedtools.BedTool(bed_i)
    # change start column to start-1, end to end+1
    dnv_bed = dnv_bed.each(mod_start_end)
    dnv_bed = dnv_bed.saveas()
    # run getfasta using pybedtools wrapper
    fasta = pybedtools.example_filename(fa_loc)
    dnv_bed = dnv_bed.sequence(fi=fasta)
    # save output sequence
    write_trinuc(dnv_bed, trinuc_out_loc)
    return bed_i + ' trinuc done'
Exemple #48
0
def test_i_methods():
    """
    Generator that yields tests, inserting different versions of `i` as needed
    """
    for method, send_kwargs, expected in parse_yaml(config_fn):
        i_isbam = False
        if 'ibam' in send_kwargs:
            i_isbam = True
            send_kwargs['ibam'] = pybedtools.example_filename(send_kwargs['ibam'])
            send_kwargs['i'] = send_kwargs['ibam']

        if ('a' in send_kwargs) and ('b' in send_kwargs):
            continue

        if ('i' not in send_kwargs) and ('ibam' not in send_kwargs):
            continue

        if 'files' in send_kwargs:
            send_kwargs['files'] = [pybedtools.example_filename(i) for i in send_kwargs['files']]

        orig_i = pybedtools.example_bedtool(send_kwargs['i'])
        if orig_i._isbam:
            i_isbam = True

        del send_kwargs['i']

        done = []
        for kind_i in ('filename', 'generator', 'stream', 'gzip'):
            if i_isbam:
                if (kind_i not in supported_bam):
                    continue
            i = converter[kind_i](orig_i)
            kind = 'i=%(kind_i)s ibam=%(i_isbam)s' % locals()
            f = partial(run, method, i, expected, **send_kwargs)
            f.description = '%(method)s, %(kind)s, %(send_kwargs)s' % locals()
            yield (f, )
Exemple #49
0
def test_cat():
    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    b_fn = pybedtools.example_filename('b.bed')
    assert a.cat(b) == a.cat(b_fn)
    expected =  fix("""
    chr1 1   500
    chr1 800 950
    """)
    assert a.cat(b) == expected

    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    c = a.cat(b, postmerge=False)
    assert len(a) + len(b) == len(c), (len(a), len(b), len(c))
Exemple #50
0
def test_cat():
    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    b_fn = pybedtools.example_filename('b.bed')
    assert a.cat(b) == a.cat(b_fn)
    expected = fix("""
    chr1 1   500
    chr1 800 950
    """)
    assert a.cat(b) == expected

    a = pybedtools.example_bedtool('a.bed')
    b = pybedtools.example_bedtool('b.bed')
    c = a.cat(b, postmerge=False)
    assert len(a) + len(b) == len(c), (len(a), len(b), len(c))
Exemple #51
0
def test_gzipping_is_default_when_extension_is_dot_gz():
    _filename = pybedtools.example_filename('a.bed')
    with open(_filename) as f:
        expected_content = f.read()

    __, temp_filename = tempfile.mkstemp(suffix='.gz')
    try:
        bedtool = pybedtools.BedTool(_filename)
        bedtool.saveas(fn=temp_filename)

        with gzip.open(temp_filename, 'rt') as gf:
            # gzip will fail next line if file is not gzipped
            actual_content = gf.read()

        assert expected_content == actual_content
    finally:
        if os.path.isfile(temp_filename):
            os.unlink(temp_filename)
Exemple #52
0
def test_gzipping_can_be_turned_off_even_for_dot_gz():
    _filename = pybedtools.example_filename('a.bed')
    with open(_filename) as f:
        expected_content = f.read()

    __, temp_filename = tempfile.mkstemp(suffix='.gz')
    try:
        bedtool = pybedtools.BedTool(_filename)
        bedtool.saveas(fn=temp_filename, compressed=False)

        with open(temp_filename) as non_gz_f:
            # actual content will be jumbled if non_gz_f is unset
            actual_content = non_gz_f.read()

        assert expected_content == actual_content
    finally:
        if os.path.isfile(temp_filename):
            os.unlink(temp_filename)
def test_gzipping_can_be_turned_off_even_for_dot_gz():
    _filename = pybedtools.example_filename('a.bed')
    with open(_filename) as f:
        expected_content = f.read()

    __, temp_filename = tempfile.mkstemp(suffix='.gz')
    try:
        bedtool = pybedtools.BedTool(_filename)
        bedtool.saveas(fn=temp_filename, compressed=False)

        with open(temp_filename) as non_gz_f:
            # actual content will be jumbled if non_gz_f is unset
            actual_content = non_gz_f.read()

        assert_equal(expected_content, actual_content)
    finally:
        if os.path.isfile(temp_filename):
            os.unlink(temp_filename)
def test_gzipping_is_default_when_extension_is_dot_gz():
    _filename = pybedtools.example_filename('a.bed')
    with open(_filename) as f:
        expected_content = f.read()

    __, temp_filename = tempfile.mkstemp(suffix='.gz')
    try:
        bedtool = pybedtools.BedTool(_filename)
        bedtool.saveas(fn=temp_filename)

        with gzip.open(temp_filename, 'rt') as gf:
            # gzip will fail next line if file is not gzipped
            actual_content = gf.read()

        assert_equal(expected_content, actual_content)
    finally:
        if os.path.isfile(temp_filename):
            os.unlink(temp_filename)
Exemple #55
0
def test_bam_to_fastq():
    x = pybedtools.example_bedtool('small.bam')
    tmpfn = pybedtools.BedTool._tmp()
    y = x.bam_to_fastq(fq=tmpfn)
    assert open(y.fastq).read() == open(pybedtools.example_filename('small.fastq')).read()
Exemple #56
0
def test_igv():
    a = pybedtools.example_bedtool('a.bed')
    a = a.igv()
    obs = open(a.igv_script).read()
    exp = open(pybedtools.example_filename('a.igv_script')).read()
    assert obs == exp
Exemple #57
0
        conf['method'] = method
        conf['method_kwargs'] = method_kwargs
        conf.update(kwargs)
        super(ConfiguredBedToolsDemo, self).__init__(**conf)


if __name__ == "__main__":
    """
    bts = [
            pybedtools.example_bedtool('BEAF_Kc_Bushey_2009.bed'),
            pybedtools.example_bedtool('CTCF_Kc_Bushey_2009.bed'),
            pybedtools.example_bedtool('Cp190_Kc_Bushey_2009.bed'),
            pybedtools.example_bedtool('SuHw_Kc_Bushey_2009.bed'),
        ]
    names = ['BEAF', 'CTCF', 'Cp190', 'Su(Hw)']

    #bts = [
    #        pybedtools.example_bedtool('a.bed'),
    #        pybedtools.example_bedtool('b.bed')]
    #names = ['a','b']
    d, m = binary_heatmap(bts, names)
    print binary_summary(d)
    """
    conf_file = pybedtools.example_filename('democonfig.yaml')
    data_path = pybedtools.example_filename("")  # dir name
    ax1 = ConfiguredBedToolsDemo(conf_file, method='intersect', method_kwargs={},
            data_path=data_path).plot()
    ax2 = ConfiguredBedToolsDemo(conf_file, method='intersect', method_kwargs=dict(u=True),
            data_path=data_path).plot()
    plt.show()