def __init__(self, fname=None, fileobj=None): if fileobj: self.bed = BedFile(fileobj=fileobj) self.fname = '*fileobj*' else: self.bed = BedFile(fname) self.fname = fname Model.__init__(self)
def __init__(self, fname, nostrand=None): self.regions = {} # store BED regions as keyed bins (chrom, bin) self.fname = fname if nostrand == 'nostrand': self.nostrand = True else: self.nostrand = False self.bed = BedFile(fname)
def bam_extract(inbam, outbam, bedfile, nostrand=False, quiet=False): bed = BedFile(bedfile) if not quiet: eta = ETA(os.stat(bedfile).st_size, fileobj=bed) else: eta = None passed = 0 for region in bed: if eta: eta.print_status(extra="extracted:%s" % (passed)) if not region.chrom in inbam.references: continue if not nostrand: strand = region.strand else: strand = None for read in bam_extract_reads(inbam, region.chrom, region.start, region.end, strand): outbam.write(read) passed += 1 if not quiet: eta.done() sys.stderr.write("%s extracted\n" % (passed, ))
def testBaseCallRegionStrand(self): # confirm that BED strand *isn't* used bam = MockBam(['test2']) bam.add_read('foo1', 'atcgatcg', '........', 0, 0, cigar='8M') bam.add_read('foo2', 'atcgatcg', 'AAAAAAAA', 0, 4, cigar='8M') bam.add_read('foo3', 'accgatcg', '########', 0, 4, cigar='8M', is_reverse=True) bam.add_read('foo4', 'atcgactgatcg', '############', 0, 0, cigar='12M') bed = BedFile(fileobj=StringIO.StringIO('''\ test2|4|7|foo|1|- '''.replace('|', '\t'))) # remember: bed is 0-based, basecall is 1-based, so 4->7, corresponds to 5->8 out = StringIO.StringIO('') ngsutils.bam.basecall.bam_basecall(bam, os.path.join( os.path.dirname(__file__), 'test.fa'), showstrand=True, regions=bed, out=out) valid = '''chrom|pos|ref|count|consensus call|minor call|ave mappings|entropy|A|C|G|T|N|Deletions|Gaps|Insertions|Inserts|+ strand %|A minor %|C minor %|G minor %|T minor %|N minor %|Deletion minor %|Insertion minor % test2|5|A|4|A||1.0|1.87433193885|4|0|0|0|0|0|0|0||0.75|0.25|0.0|0.0|0.0|0.0|0.0|0.0 test2|6|T|4|T/C||1.0|2.94335833285|0|2|0|2|0|0|0|0||0.75|0.0|0.5|0.0|0.0|0.0|0.0|0.0 test2|7|C|4|C|T|1.0|3.45990045591|0|3|0|1|0|0|0|0||0.75|0.0|0.333333333333|0.0|0.0|0.0|0.0|0.0 test2|8|G|4|G||1.0|1.87433193885|0|0|4|0|0|0|0|0||0.75|0.0|0.0|0.25|0.0|0.0|0.0|0.0 '''.replace('|', '\t') self.assertEqual(valid, out.getvalue())
bed = None ref = None stranded = True include_name = False last = None for arg in sys.argv[1:]: if last == '-min': min_size = int(arg) last = None elif arg in ['-min']: last = arg elif arg == '-name': include_name = True elif arg == '-ns': stranded = False elif not bed and os.path.exists(arg): bed = arg elif not ref and os.path.exists(arg): ref = arg if not bed or not ref: usage() sys.exit(1) bed_tofasta(BedFile(bed), ref, min_size=min_size, stranded=stranded, include_name=include_name)
#!/usr/bin/env python ''' Tests for bedutils reduce ''' import unittest import StringIO import ngsutils.bed.reduce from ngsutils.bed import BedFile from ngsutils.bam.t import _matches bedtest = BedFile(fileobj=StringIO.StringIO('''\ chr1|100|150|foo1|10|+ chr1|140|200|foo2|10|+ chr1|500|550|foo3|10|+ chr1|560|600|foo4|10|- chr1|600|700|foo5|10|+ '''.replace('|', '\t'))) class ReduceTest(unittest.TestCase): def testReduce(self): valids = '''\ chr1|100|200|foo1,foo2|20|+ chr1|500|550|foo3|10|+ chr1|560|700|foo4,foo5|20|+ '''.replace('|', '\t').split('\n') out = StringIO.StringIO('') ngsutils.bed.reduce.bed_reduce(bedtest,
if __name__ == '__main__': qbed_fname = None refbed_fname = None maxdist = 100000 match = False nostrand = False last = None for arg in sys.argv[1:]: if last == '-max': maxdist = int(arg) last = None elif arg in ['-max']: last = arg elif arg == '-match': match = True elif arg == '-nostrand': nostrand = True elif not qbed_fname and (os.path.exists(arg) or arg == '-'): qbed_fname = arg elif not refbed_fname and os.path.exists(arg): refbed_fname = arg if not qbed_fname or not refbed_fname: usage() qbed = BedFile(qbed_fname) refbed = BedFile(refbed_fname) find_nearest(qbed, refbed, maxdist, match, nostrand)
if last == '-qual': min_qual = int(arg) last = None elif last == '-ref': if os.path.exists(arg) and os.path.exists('%s.fai' % arg): ref = arg else: print "Missing FASTA file or index: %s" % arg usage() last = None elif last == '-count': min_count = int(arg) last = None elif last == '-bed': if os.path.exists(arg): regions = BedFile(arg) else: print "BED file: %s not found!" % arg usage() last = None elif last == '-mask': mask = int(arg) last = None elif last == '-minorpct': minorpct = float(arg) last = None elif last == '-profile': profile = arg last = None elif arg == '-h': usage()
def usage(): print __doc__ print """\ Usage: bedutils sizes bedfile """ sys.exit(1) def bed_size(bed, out=sys.stdout): for region in bed: out.write('%s\n' % (region.end - region.start)) if __name__ == '__main__': fname = None for arg in sys.argv[1:]: if arg == '-h': usage() if not fname and (os.path.exists(arg) or arg == '-'): fname = arg else: print "Unknown option: %s" % arg usage() if not fname: usage() bed_size(BedFile(fname))
out.write('%s\n' % '\t'.join(cols)) if __name__ == '__main__': fname1 = None fname2 = None stranded = True for arg in sys.argv[1:]: if arg == '-h': usage() elif arg == '-nostrand': stranded = False elif not fname1 and (arg == '-' or os.path.exists(arg)): fname1 = arg elif not fname2 and (arg == '-' or os.path.exists(arg)): fname2 = arg else: print "Unknown option: %s" % arg usage() if not fname1 or fname2: usage() if fname1 == '-' and fname2 == '-': usage("Both input files can't be from stdin!") bed2 = BedFile(fname2) bed_subtract(fname1, bed2, stranded) bed2.close()
if __name__ == '__main__': fname1 = None fname2 = None stranded = True for arg in sys.argv[1:]: if arg == '-h': usage() elif arg == '-nostrand': stranded = False elif not fname1 and (arg == '-' or os.path.exists(arg)): fname1 = arg elif not fname2 and (arg == '-' or os.path.exists(arg)): fname2 = arg else: print "Unknown option: %s" % arg usage() if not fname1 or not fname2: usage() if fname1 == '-' and fname2 == '-': usage("Both input files can't be from stdin!") bed1 = BedStreamer(fname1) bed2 = BedFile(fname2) bed_subtract(bed1, bed2, stranded) bed2.close()
if arg == '-h': usage() if last == '-name': name = arg last = None elif last == '-score': score = arg last = None elif last == '-strand': strand = arg last = None elif last == '-rgb': if not rgb_name: rgb_name = arg else: rgb[rgb_name] = arg rgb_name = None last = None elif arg in ['-name', '-score', '-strand', '-rgb']: last = arg elif not fname and (os.path.exists(arg) or arg == '-'): fname = arg else: print "Unknown option: %s" % arg usage() if not fname: usage() bed_annotate(BedFile(fname), name=name, score=score, strand=strand, rgb=rgb)
#!/usr/bin/env python ''' Tests for bedutils extend ''' import unittest import StringIO from ngsutils.bed import BedFile import ngsutils.bed.extend testbed = BedFile(fileobj=StringIO.StringIO(''' chr1|10|90|foo|1|+ chr1|10|90|foo|1|- chr1|100|150|foo|1|+ chr1|200|250|foo|1|- '''.replace('|', '\t'))) class ExtendTest(unittest.TestCase): def testAbsolute(self): valid = '''chr1|10|110|foo|1|+ chr1|0|90|foo|1|- chr1|100|200|foo|1|+ chr1|150|250|foo|1|- '''.replace('|', '\t') out = StringIO.StringIO('') ngsutils.bed.extend.bed_extend(testbed, 100, relative=False, out=out) self.assertEqual(out.getvalue(), valid)
#!/usr/bin/env python ''' Tests for bedutils tobedgraph ''' import unittest import ngsutils.bed.tobedgraph import StringIO from ngsutils.bed import BedFile bedtest = BedFile(fileobj=StringIO.StringIO('''\ test1|10|20|foo1|10|+ test1|10|20|foo2|10|- test1|15|25|foo3|10|+ test1|100|150|foo4|1|+ '''.replace('|', '\t'))) class BedGraphTest(unittest.TestCase): def testBedGraph(self): valid = '''\ test1|10|15|2 test1|15|20|3 test1|20|25|1 test1|100|150|1 '''.replace('|', '\t') sio = StringIO.StringIO("") ngsutils.bed.tobedgraph.bed_tobedgraph(bedtest, out=sio) self.assertEqual(valid, sio.getvalue()) sio.close()
present += 1 out.write('\t%s\n' % present) if __name__ == '__main__': refname = None bedfiles = [] stranded = True last = None for arg in sys.argv[1:]: if arg == '-h': usage() elif arg == '-ns': stranded = False elif not refname and os.path.exists(arg): refname = arg elif os.path.exists(arg): bedfiles.append(BedFile(arg)) else: print "Bad argument: %s" % arg usage() sys.exit(1) if not refname or not bedfiles: usage() sys.exit(1) bed_refcount(BedFile(refname), bedfiles, stranded=stranded)
if __name__ == "__main__": bed = None strand = None norm = None last = None for arg in sys.argv[1:]: if arg == '-h': usage() if last == '-norm': norm = float(arg) last = None elif arg in ['-norm']: last = arg elif arg == '-plus': strand = '+' elif arg == '-minus': strand = '-' elif not bed and os.path.exists(arg): bed = arg else: print "Unknown option or missing index: %s" % arg usage() if not bed: usage() bed_tobedgraph(BedFile(bed), strand, norm)
#!/usr/bin/env python ''' Tests for bedutils refcount ''' import unittest import StringIO import ngsutils.bed.refcount from ngsutils.bed import BedFile bedtest1 = BedFile('test1', fileobj=StringIO.StringIO('''\ chr1|100|150|foo1|10|+ chr1|140|200|foo2|10|+ chr1|500|550|foo3|10|+ chr1|560|600|foo4|10|- chr1|600|700|foo5|10|+ '''.replace('|', '\t'))) bedtest2 = BedFile('test2', fileobj=StringIO.StringIO('''\ chr1|90|120|bar1|10|+ chr1|140|200|bar2|10|+ chr1|510|520|bar3|10|+ chr1|500|660|bar4|10|- chr1|520|570|bar5|10|+ '''.replace('|', '\t'))) bedtest3 = BedFile('test3', fileobj=StringIO.StringIO('''\
def bed_reduce(target_bed, query_bed, stranded=True, exact=False, out=sys.stdout): for qregion in query_bed: for tregion in target_bed.fetch(qregion.chrom, qregion.start, qregion.end, qregion.strand if stranded else None): if not exact or (qregion.start == tregion.start and qregion.end == tregion.end): qregion.write(out) break if __name__ == '__main__': fnames = [] stranded = True exact = False for arg in sys.argv[1:]: if arg == '-h': usage() if arg == '-nostrand': stranded = False elif arg == '-exact': exact = True elif os.path.exists(arg): fnames.append(arg) else: print "Unknown option: %s" % arg usage() if not fnames or len(fnames) < 2: usage() bed_reduce(BedFile(fnames[0]), BedStreamer(fnames[1]), stranded, exact)
class ExcludeBED(object): def __init__(self, fname, nostrand=None): self.regions = {} # store BED regions as keyed bins (chrom, bin) self.fname = fname if nostrand == 'nostrand': self.nostrand = True else: self.nostrand = False self.bed = BedFile(fname) # with open(fname) as f: # for line in f: # if not line: # continue # if line[0] == '#': # continue # cols = line.strip().split('\t') # chrom = cols[0] # start = int(cols[1]) # end = int(cols[2]) # if self.nostrand: # strand = '?' # else: # strand = cols[5] # startbin = start / 100000 # endbin = end / 100000 # for bin in xrange(startbin, endbin + 1): # if not (chrom, bin) in self.regions: # self.regions[(chrom, bin)] = [] # self.regions[(chrom, bin)].append((start, end, strand)) def filter(self, bam, read): if not read.is_unmapped: if self.nostrand: strand = None elif read.is_reverse: strand = '-' else: strand = '+' for region in self.bed.fetch(bam.getrname(read.tid), read.pos, read.aend, strand): # region found, exclude read return False return True # bin = read.pos / 100000 # ref = bam.getrname(read.tid) # if not (ref, bin) in self.regions: # return True # for start, end, strand in self.regions[(ref, bin)]: # if not self.nostrand: # if strand == '+' and read.is_reverse: # continue # if strand == '-' and not read.is_reverse: # continue # if start <= read.pos <= end: # return False # if start <= read.aend <= end: # return False # return True def __repr__(self): return 'Excluding from BED: %s%s' % (self.fname, ' nostrand' if self.nostrand else '') def close(self): pass
''') sys.exit(1) if __name__ == '__main__': bam_fname = None bed_fname = None maxdist = 100000 last = None for arg in sys.argv[1:]: if last == '-max': maxdist = int(arg) last = None elif arg in ['-max']: last = arg elif not bam_fname and os.path.exists(arg): bam_fname = arg elif not bed_fname and os.path.exists(arg): bed_fname = arg if not bam_fname or not bed_fname: usage() bed = BedFile(bed_fname) bam = ngsutils.bam.bam_open(bam_fname) find_nearest(bam, bed, maxdist)
#!/usr/bin/env python ''' Tests for bedutils tofasta ''' import os import unittest import ngsutils.bed.tofasta import StringIO from ngsutils.bed import BedFile bedtest = BedFile(fileobj=StringIO.StringIO('''\ test1|0|10|foo1|10|+ test1|10|20|foo2|10|- test1|0|5|foo1|10|+ test3|0|50|foo1|10|+ '''.replace('|', '\t'))) fasta = os.path.join(os.path.dirname(__file__), 'test.fa') class FASTATest(unittest.TestCase): def testBedFASTA(self): valid = '''\ >test1:0-10 aaaaaaaaaa >test1:10-20 cccccccccc '''
def testBedFile(self): fname = os.path.join(os.path.dirname(__file__), 'test.bed') valid = ['chr1|100|150|foo|1|+', 'chr1|100|150|foo|1|-', 'chr1|200|250|foo|1|+', 'chr1|300|350|foo|1|-', ] regions = ['%s|%s|%s|%s|%s|%s' % (x.chrom, x.start, x.end, x.name, x.score_int, x.strand) for x in BedFile(fname)] self.assertTrue(_matches(valid, regions))
def bed_stats(infile, gtf_file=None, out=sys.stdout, quiet=False, names=False): if not quiet: sys.stderr.write('Calculating BED region stats...\n') stats = BedStats(BedFile(infile), gtf_file, names=names) stats.write(out)
def testBedFileObj(self): valid = ['chr1|100|150|foo|1|+', 'chr1|100|150|foo|1|-', ] instr = StringIO.StringIO(''' chr1|100|150|foo|1|+ chr1|100|150|foo|1|- '''.replace('|', '\t')) regions = ['%s|%s|%s|%s|%s|%s' % (x.chrom, x.start, x.end, x.name, x.score, x.strand) for x in BedFile(fileobj=instr)] self.assertTrue(_matches(valid, regions))
def testBedRegion(self): valid = ['chr1|100|150'] regions = ['%s|%s|%s' % (x.chrom, x.start, x.end) for x in BedFile(region="chr1:101-150")] self.assertTrue(_matches(valid, regions))
Converts the "score" field to be an integer """ sys.exit(1) def bed_clean(bed, out=sys.stdout): for region in bed: region.score = int(region.score) if region.score > 1000: region.score = 1000 out.write('%s\n' % region) if __name__ == '__main__': fname = None for arg in sys.argv[1:]: if arg == '-h': usage() if not fname and (os.path.exists(arg) or arg == '-'): fname = arg else: print "Unknown option: %s" % arg usage() if not fname: usage() bed_clean(BedFile(fname))