Esempio n. 1
0
class IntervalFileTest(unittest.TestCase):
    file = "data/rmsk.hg18.chr21.bed"
    def setUp(self):
        self.file = os.path.join(PATH, self.file)
        self.bed = IntervalFile(self.file)

    def testOverlaps(self):
        i    = Interval("chr21", 9719768, 9739768)
        hits = self.bed.all_hits(i)
        self.assertEqual(len(hits), 8)
        for hit in hits:
            self.assert_(hit.start <= 9739768 and hit.end >= 9719768)

    def testStrands(self):
        i = Interval("chr21", 9719768, 9739768, "+")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '+')

        i = Interval("chr21", 9719768, 9739768, "-")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '-')
Esempio n. 2
0
def main():

    bam = Samfile("bedtools/tests/data/NA18152.bam", "rb")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")

    # Example 1:
    #    Method: IntervalFile.all_hits()
    #    Report _all_ of the rmsk features that overlap with the BAM alignment
    for al in bam:
        strand = "+"
        if al.is_reverse: strand = "-"
        i = Interval(bam.getrname(al.rname), al.pos, al.aend, strand)

        for hit in rmsk.all_hits(i, same_strand=True, ovlp_pct=0.75):
            print "\t".join(str(x) for x in [i, hit])
def main():

    bam  = Samfile("bedtools/tests/data/NA18152.bam", "rb")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    
    # Example 1:
    #    Method: IntervalFile.all_hits()
    #    Report _all_ of the rmsk features that overlap with the BAM alignment
    for al in bam:
        strand = "+"
        if al.is_reverse: strand = "-"
        i = Interval(bam.getrname(al.rname), al.pos, al.aend, strand)
        
        for hit in rmsk.all_hits(i, same_strand=True, ovlp_pct=0.75):
            print "\t".join(str(x) for x in [i,hit])
Esempio n. 4
0
class IntervalFileTest(unittest.TestCase):
    file = "data/rmsk.hg18.chr21.bed"
    def setUp(self):
        self.file = os.path.join(PATH, self.file)
        self.bed = IntervalFile(self.file)

    def testOverlaps(self):
        i    = Interval("chr21", 9719768, 9739768)
        hits = self.bed.all_hits(i)
        print len(hits)
        self.assertEqual(len(hits), 8)
        for hit in hits:
            self.assert_(hit.start <= 9739768 and hit.end >= 9719768)

    def testStrands(self):
        i = Interval("chr21", 9719768, 9739768, "+")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '+')

        i = Interval("chr21", 9719768, 9739768, "-")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '-')
def main(args):
	"""
	Examples of printing each interval in an interval file.
	- Works with BED, GTF and VCF files.
	- Can be uncompressed or GZIP compressed.
	"""

	##########################################################
	# ex1. Report the coordinates of overlap b/w exons and rmsk
	#
	# Equivalent to: intersectBed -a exons -b rmsk
	# Uses:           IntervalFile.all_hits()
	##########################################################
	genes = IntervalFile(args.genefile)
	peaks  = IntervalFile(args.peakfile)

	for gene in genes:
		for peak_hit in peaks.all_hits(gene):
			print "\t".join(str(f) for f in [gene.chrom, peak_hit.o_start, peak_hit.o_end])
Esempio n. 6
0
def main():
    """
    """

    ##########################################################
    # ex1. Report the coordinates of overlap b/w exons and rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk 
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons  = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk   = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    
    # allow 1kb of "slop" on each side of the exon 
    # when looking for hits
    window = 1000
    for exon in exons:
        # add the slop and search
        exon_slop = Interval(exon.chrom, exon.start-window, exon.end + window, exon.strand)
        for rmsk_hit in rmsk.all_hits(exon_slop):
            print "\t".join(str(f) for f in [exon, rmsk_hit])
Esempio n. 7
0
def main():
    """
    """

    ##########################################################
    # ex1. Report the coordinates of overlap b/w exons and rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")

    # allow 1kb of "slop" on each side of the exon
    # when looking for hits
    window = 1000
    for exon in exons:
        # add the slop and search
        exon_slop = Interval(exon.chrom, exon.start - window,
                             exon.end + window, exon.strand)
        for rmsk_hit in rmsk.all_hits(exon_slop):
            print "\t".join(str(f) for f in [exon, rmsk_hit])
Esempio n. 8
0
def main():
    """
    """

    ##########################################################
    # ex1. Report the coordinates of overlap b/w exons and rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        for rmsk_hit in rmsk.all_hits(exon):
            print "\t".join(
                str(f) for f in [exon.chrom, rmsk_hit.o_start, rmsk_hit.o_end])

    ##########################################################
    # ex2. Report the original features for overlapping
    #    exons and rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk -wa -wb
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        for rmsk_hit in rmsk.all_hits(exon):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.name, exon.score,
                    exon.strand, rmsk_hit.chrom, rmsk_hit.start, rmsk_hit.end,
                    rmsk_hit.name, rmsk_hit.score, rmsk_hit.strand
                ])

    ##########################################################
    # ex3. Report the count of rmsk overlapping each exon
    #
    # Equivalent to: intersectBed -a exons -b rmsk -c
    # Uses:           IntervalFile.count_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # get the number of hits in rmsk
        num_hits = rmsk.count_hits(exon)
        print "\t".join(
            str(f) for f in [
                exon.chrom, exon.start, exon.end, exon.name, exon.score,
                exon.strand, num_hits
            ])

    ##########################################################
    # ex4. Report exons that overlap at least one rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk -u
    # Uses:           IntervalFile.any_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # does this exon overlap any rmsk?
        if rmsk.any_hits(exon):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.name, exon.score,
                    exon.strand
                ])

    ##########################################################
    # ex5. Report exons that DO NOT overlap at least one rmsk
    #
    # Equivalent to: intersectBed -a exons -b rmsk -v
    # Uses:           IntervalFile.any_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # does this exon overlap any rmsk?
        if not rmsk.any_hits(exon):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.name, exon.score,
                    exon.strand
                ])

    ##########################################################
    # ex6. Report overlap b/w exons and rmsk on the same strand
    #
    # Equivalent to: intersectBed -a exons -b rmsk -s
    # Uses:           IntervalFile.all_hits(same_strand=True)
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # use "same_strand" to enforce, well, same strand.
        for rmsk_hit in rmsk.all_hits(exon, same_strand=True):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.strand, rmsk_hit.
                    chrom, rmsk_hit.start, rmsk_hit.end, rmsk_hit.strand
                ])

    ##########################################################
    # ex7. Report overlap b/w exons and rmsk where the rmsk
    #    feature covers at least 50% of the exon.
    #
    # Equivalent to: intersectBed -a exons -b rmsk -f 0.50
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # use "ovlp_pct" to enforce the faction of overlap w.r.t to exon
        for rmsk_hit in rmsk.all_hits(exon, ovlp_pct=0.50):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.strand, rmsk_hit.
                    chrom, rmsk_hit.start, rmsk_hit.end, rmsk_hit.strand
                ])

    ##########################################################
    # ex8. Report overlap b/w exons and rmsk where the rmsk
    #    feature covers at least 50% of the exon.
    #
    # Equivalent to: intersectBed -a exons -b rmsk -s -f 0.50
    # Uses:           IntervalFile.all_hits()
    ##########################################################
    exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed")
    rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
    for exon in exons:
        # use "same_strand" to enforce, well, same strand.
        for rmsk_hit in rmsk.all_hits(exon, same_strand=True, ovlp_pct=0.50):
            print "\t".join(
                str(f) for f in [
                    exon.chrom, exon.start, exon.end, exon.strand, rmsk_hit.
                    chrom, rmsk_hit.start, rmsk_hit.end, rmsk_hit.strand
                ])