class IntervalFileTest(unittest.TestCase): file = "data/rmsk.hg18.chr21.bed" def setUp(self): self.file = os.path.join(PATH, self.file) self.bed = IntervalFile(self.file) def testOverlaps(self): i = Interval("chr21", 9719768, 9739768) hits = self.bed.all_hits(i) self.assertEqual(len(hits), 8) for hit in hits: self.assert_(hit.start <= 9739768 and hit.end >= 9719768) def testStrands(self): i = Interval("chr21", 9719768, 9739768, "+") hits = self.bed.all_hits(i, same_strand=True) for hit in hits: self.assert_(hit.strand == '+') i = Interval("chr21", 9719768, 9739768, "-") hits = self.bed.all_hits(i, same_strand=True) for hit in hits: self.assert_(hit.strand == '-')
def main(): bam = Samfile("bedtools/tests/data/NA18152.bam", "rb") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") # Example 1: # Method: IntervalFile.all_hits() # Report _all_ of the rmsk features that overlap with the BAM alignment for al in bam: strand = "+" if al.is_reverse: strand = "-" i = Interval(bam.getrname(al.rname), al.pos, al.aend, strand) for hit in rmsk.all_hits(i, same_strand=True, ovlp_pct=0.75): print "\t".join(str(x) for x in [i, hit])
def main(): bam = Samfile("bedtools/tests/data/NA18152.bam", "rb") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") # Example 1: # Method: IntervalFile.all_hits() # Report _all_ of the rmsk features that overlap with the BAM alignment for al in bam: strand = "+" if al.is_reverse: strand = "-" i = Interval(bam.getrname(al.rname), al.pos, al.aend, strand) for hit in rmsk.all_hits(i, same_strand=True, ovlp_pct=0.75): print "\t".join(str(x) for x in [i,hit])
class IntervalFileTest(unittest.TestCase): file = "data/rmsk.hg18.chr21.bed" def setUp(self): self.file = os.path.join(PATH, self.file) self.bed = IntervalFile(self.file) def testOverlaps(self): i = Interval("chr21", 9719768, 9739768) hits = self.bed.all_hits(i) print len(hits) self.assertEqual(len(hits), 8) for hit in hits: self.assert_(hit.start <= 9739768 and hit.end >= 9719768) def testStrands(self): i = Interval("chr21", 9719768, 9739768, "+") hits = self.bed.all_hits(i, same_strand=True) for hit in hits: self.assert_(hit.strand == '+') i = Interval("chr21", 9719768, 9739768, "-") hits = self.bed.all_hits(i, same_strand=True) for hit in hits: self.assert_(hit.strand == '-')
def main(args): """ Examples of printing each interval in an interval file. - Works with BED, GTF and VCF files. - Can be uncompressed or GZIP compressed. """ ########################################################## # ex1. Report the coordinates of overlap b/w exons and rmsk # # Equivalent to: intersectBed -a exons -b rmsk # Uses: IntervalFile.all_hits() ########################################################## genes = IntervalFile(args.genefile) peaks = IntervalFile(args.peakfile) for gene in genes: for peak_hit in peaks.all_hits(gene): print "\t".join(str(f) for f in [gene.chrom, peak_hit.o_start, peak_hit.o_end])
def main(): """ """ ########################################################## # ex1. Report the coordinates of overlap b/w exons and rmsk # # Equivalent to: intersectBed -a exons -b rmsk # Uses: IntervalFile.all_hits() ########################################################## exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") # allow 1kb of "slop" on each side of the exon # when looking for hits window = 1000 for exon in exons: # add the slop and search exon_slop = Interval(exon.chrom, exon.start-window, exon.end + window, exon.strand) for rmsk_hit in rmsk.all_hits(exon_slop): print "\t".join(str(f) for f in [exon, rmsk_hit])
def main(): """ """ ########################################################## # ex1. Report the coordinates of overlap b/w exons and rmsk # # Equivalent to: intersectBed -a exons -b rmsk # Uses: IntervalFile.all_hits() ########################################################## exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") # allow 1kb of "slop" on each side of the exon # when looking for hits window = 1000 for exon in exons: # add the slop and search exon_slop = Interval(exon.chrom, exon.start - window, exon.end + window, exon.strand) for rmsk_hit in rmsk.all_hits(exon_slop): print "\t".join(str(f) for f in [exon, rmsk_hit])
def main(): """ """ ########################################################## # ex1. Report the coordinates of overlap b/w exons and rmsk # # Equivalent to: intersectBed -a exons -b rmsk # Uses: IntervalFile.all_hits() ########################################################## exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") for exon in exons: for rmsk_hit in rmsk.all_hits(exon): print "\t".join( str(f) for f in [exon.chrom, rmsk_hit.o_start, rmsk_hit.o_end]) ########################################################## # ex2. Report the original features for overlapping # exons and rmsk # # Equivalent to: intersectBed -a exons -b rmsk -wa -wb # Uses: IntervalFile.all_hits() ########################################################## exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") for exon in exons: for rmsk_hit in rmsk.all_hits(exon): print "\t".join( str(f) for f in [ exon.chrom, exon.start, exon.end, exon.name, exon.score, exon.strand, rmsk_hit.chrom, rmsk_hit.start, rmsk_hit.end, rmsk_hit.name, rmsk_hit.score, rmsk_hit.strand ]) ########################################################## # ex3. Report the count of rmsk overlapping each exon # # Equivalent to: intersectBed -a exons -b rmsk -c # Uses: IntervalFile.count_hits() ########################################################## exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") for exon in exons: # get the number of hits in rmsk num_hits = rmsk.count_hits(exon) print "\t".join( str(f) for f in [ exon.chrom, exon.start, exon.end, exon.name, exon.score, exon.strand, num_hits ]) ########################################################## # ex4. Report exons that overlap at least one rmsk # # Equivalent to: intersectBed -a exons -b rmsk -u # Uses: IntervalFile.any_hits() ########################################################## exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") for exon in exons: # does this exon overlap any rmsk? if rmsk.any_hits(exon): print "\t".join( str(f) for f in [ exon.chrom, exon.start, exon.end, exon.name, exon.score, exon.strand ]) ########################################################## # ex5. Report exons that DO NOT overlap at least one rmsk # # Equivalent to: intersectBed -a exons -b rmsk -v # Uses: IntervalFile.any_hits() ########################################################## exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") for exon in exons: # does this exon overlap any rmsk? if not rmsk.any_hits(exon): print "\t".join( str(f) for f in [ exon.chrom, exon.start, exon.end, exon.name, exon.score, exon.strand ]) ########################################################## # ex6. Report overlap b/w exons and rmsk on the same strand # # Equivalent to: intersectBed -a exons -b rmsk -s # Uses: IntervalFile.all_hits(same_strand=True) ########################################################## exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") for exon in exons: # use "same_strand" to enforce, well, same strand. for rmsk_hit in rmsk.all_hits(exon, same_strand=True): print "\t".join( str(f) for f in [ exon.chrom, exon.start, exon.end, exon.strand, rmsk_hit. chrom, rmsk_hit.start, rmsk_hit.end, rmsk_hit.strand ]) ########################################################## # ex7. Report overlap b/w exons and rmsk where the rmsk # feature covers at least 50% of the exon. # # Equivalent to: intersectBed -a exons -b rmsk -f 0.50 # Uses: IntervalFile.all_hits() ########################################################## exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") for exon in exons: # use "ovlp_pct" to enforce the faction of overlap w.r.t to exon for rmsk_hit in rmsk.all_hits(exon, ovlp_pct=0.50): print "\t".join( str(f) for f in [ exon.chrom, exon.start, exon.end, exon.strand, rmsk_hit. chrom, rmsk_hit.start, rmsk_hit.end, rmsk_hit.strand ]) ########################################################## # ex8. Report overlap b/w exons and rmsk where the rmsk # feature covers at least 50% of the exon. # # Equivalent to: intersectBed -a exons -b rmsk -s -f 0.50 # Uses: IntervalFile.all_hits() ########################################################## exons = IntervalFile("bedtools/tests/data/exons.hg18.chr21.bed") rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed") for exon in exons: # use "same_strand" to enforce, well, same strand. for rmsk_hit in rmsk.all_hits(exon, same_strand=True, ovlp_pct=0.50): print "\t".join( str(f) for f in [ exon.chrom, exon.start, exon.end, exon.strand, rmsk_hit. chrom, rmsk_hit.start, rmsk_hit.end, rmsk_hit.strand ])