def test_emptyfile(): """Behavior upon getting an empty file""" # check with an empty, open file-like object. a = bedparser.bedfile(StringIO('')) a = list(a) assert len(a) == 0 # now check with a filename. a = bedparser.bedfile('inputfiles/empty.bed') a = list(a) assert len(a) == 0
def test_tracknames(): """Bed file with a trackline definition""" a = bedparser.bedfile('inputfiles/multi.tracks.3.fields.bed') a = list(a) check_multitracks_first(a[0]) check_multitracks_last(a[-1]) # now check with an open file. a = bedparser.bedfile(open('inputfiles/multi.tracks.3.fields.bed')) a = list(a) check_multitracks_first(a[0]) check_multitracks_last(a[-1])
def test_singlefeature_filename(): '''single-feature bed file with an extra newline on the end.''' f = 'inputfiles/single.track.unnamed.single.feature.bed' a = bedparser.bedfile(f) a = list(a) assert len(a) == 1 check_singlefeature(a[0])
def test_9fields(): 'parse single.track.9.fields.bed' f = open('inputfiles/single.track.9.fields.bed') a = bedparser.bedfile(f) a = list(a) assert len(a) == 4 check_9fields_first(a[0]) check_9fields_last(a[-1]) # check by passing a filename instead of open file f = 'inputfiles/single.track.9.fields.bed' a = bedparser.bedfile(f) a = list(a) assert len(a) == 4 check_9fields_first(a[0]) check_9fields_last(a[-1])
def bedSplitter(infile, outfile): """ Bins BED features with common names into individual tracks. *infile* An open file-like object *outfile* A file-like object open for writing. """ # first, make sure it's sorted by name. outfile = open(outfile,'w') tmp = tempfile.mktemp() cmd = 'sort -k 4 %s > %s' % (infile, tmp) print cmd os.system(cmd) trackname = None lastname = None for i in bedparser.bedfile(tmp): if i.name != lastname: trackname = i.name outfile.write('track name=%s description=%s itemRgb=1\n' % (i.name,i.name)) outfile.write(i.tostring()) lastname = i.name
def bedSplitter(infile, outfile): """ Bins BED features with common names into individual tracks. *infile* An open file-like object *outfile* A file-like object open for writing. """ # first, make sure it's sorted by name. outfile = open(outfile, 'w') tmp = tempfile.mktemp() cmd = 'sort -k 4 %s > %s' % (infile, tmp) print cmd os.system(cmd) trackname = None lastname = None for i in bedparser.bedfile(tmp): if i.name != lastname: trackname = i.name outfile.write('track name=%s description=%s itemRgb=1\n' % (i.name, i.name)) outfile.write(i.tostring()) lastname = i.name
def bedSizeFilter(infile, outfile, minlen=None, maxlen=None): if maxlen is None: maxlen = 1e30 if minlen is None: minlen = -1e30 for i in bedparser.bedfile(infile): length = abs(i.start - i.stop) if minlen < length < maxlen: outfile.write('%s\t%s\t%s\n' % (i.chr, i.start, i.stop))
def bedSizeFilter(infile, outfile, minlen=None, maxlen=None): if maxlen is None: maxlen = 1e30 if minlen is None: minlen = -1e30 for i in bedparser.bedfile(infile): length = abs(i.start-i.stop) if minlen < length < maxlen: outfile.write('%s\t%s\t%s\n' % (i.chr, i.start,i.stop))
def test_checkfields(): """Test a single-line bedfile.""" data = StringIO("""chrX\t10\t100\n""") a = bedparser.bedfile(data) a = list(a) i = a[0] assert i.chr == 'chrX' assert i.start == 10 assert i.stop == 100 assert len(a) == 1
def bedColorUniques(a, b, oa, ob, multicolor, uniquecolor): tmp1 = tempfile.mktemp() tmp2 = tempfile.mktemp() cmds = """awk -F "\\t" 'BEGIN {OFS="\\t"}{print $1, $2, $3, $4, $5, $6}' %s > %s""" % ( a, tmp1) log.info(cmds) os.system(cmds) cmds = """awk -F "\\t" 'BEGIN {OFS="\\t"}{print $1, $2, $3, $4, $5, $6}' %s > %s""" % ( b, tmp2) log.info(cmds) os.system(cmds) log.info('intersecting') tmp3 = tempfile.mktemp() cmds = """ intersectBed \\ -a %s \\ -b %s \\ -v \\ -s \\ > %s""" % (tmp1, tmp2, tmp3) os.system(cmds) log.info('using bedparser.py to recolor multireads') outa = open(oa, 'w') for i in bedparser.bedfile(tmp3): i.thickStart = i.start i.thickStop = i.stop i.itemRGB = multicolor outa.write(i.tostring()) outa.close() log.info('using bedparser.py to recolor unique reads') outb = open(ob, 'w') for i in bedparser.bedfile(b): i.thickStart = i.start i.thickStop = i.stop i.itemRGB = uniquecolor outb.write(i.tostring()) outb.close()
def get_min_dist(fn): '''Returns the minimum distance between bed features''' bedconfirmed = bedfile(fn) pos = 0 mindist = 1e35 chr = None for b in bedconfirmed: if b.chr == chr: dist = b.start-pos if dist < mindist: mindist = dist else: chr = b.chr pos = b.stop return mindist
def get_min_dist(fn): '''Returns the minimum distance between bed features''' bedconfirmed = bedfile(fn) pos = 0 mindist = 1e35 chr = None for b in bedconfirmed: if b.chr == chr: dist = b.start - pos if dist < mindist: mindist = dist else: chr = b.chr pos = b.stop return mindist
def bedValueFilter(fn, minval, maxval, outfn, bedgraph=False): """Filters a bed file by value. *fn* is a filename, while *outfn* is a file-like handle.""" if maxval is None: maxval = 1e30 if minval is None: minval = -1e30 if bedgraph: iterator = bedparser.bedgraph(fn) else: iterator = bedparser.bedfile(fn) for i in iterator: if i.value is None: print "No value for this feature (%s)" % i sys.exit(1) if minval < i.value < maxval: fout.write('%s\t%s\t%s\t%s\n' % (i.chr, i.start,i.stop,i.value)) if outfn is not None: fout.close()
def bedValueFilter(fn, minval, maxval, outfn, bedgraph=False): """Filters a bed file by value. *fn* is a filename, while *outfn* is a file-like handle.""" if maxval is None: maxval = 1e30 if minval is None: minval = -1e30 if bedgraph: iterator = bedparser.bedgraph(fn) else: iterator = bedparser.bedfile(fn) for i in iterator: if i.value is None: print "No value for this feature (%s)" % i sys.exit(1) if minval < i.value < maxval: fout.write('%s\t%s\t%s\t%s\n' % (i.chr, i.start, i.stop, i.value)) if outfn is not None: fout.close()
def bed2db(bedfn, dbfn): conn = sqlite3.connect(dbfn) c = conn.cursor() c.execute(''' CREATE TABLE features (chrom text, start int, stop int, name text, value float, strand text, thickStart int, thickStop int, itemRGB text, blockCount int, blockSizes text, blockStarts text) ''') for feature in bedparser.bedfile(bedfn): items = [feature.chr, feature.start, feature.stop, feature.name, feature.value, feature.strand, feature.thickStart, feature.thickStop, feature.itemRGB, feature.blockCount, feature.blockSizes, feature.blockStarts] c.execute(''' INSERT INTO features VALUES (?,?,?,?,?,?,?,?,?,?,?,?) ''',items) c.execute('CREATE INDEX idx_starts ON features(start)') c.execute('CREATE INDEX idx_stops ON features(stop)') c.execute('CREATE INDEX idx_chroms ON features(chrom)') c.execute('CREATE INDEX idx_values ON features(value)') conn.commit() return conn
def main(options): if (options.left or options.right) and options.flank: raise ValueError, '-f cannot be specified if -r or -l is used.' sys.exit(1) if options.flank: options.left = options.flank options.right = options.flank if not options.input: raise ValueError, 'Need input file.' if options.input == 'stdin': options.input = sys.stdin if not options.left or not options.right: raise ValueError, 'Flanking region not specified' s = '%s\t%s\t%s' for i in bedparser.bedfile(options.input): print s % (i.chr, i.start - options.left - options.buffer, i.start - options.buffer) print s % (i.chr, i.stop + options.buffer, i.stop + options.right + options.buffer)