Ejemplo n.º 1
0
def test_emptyfile():
    """Behavior upon getting an empty file"""

    # check with an empty, open file-like object.
    a = bedparser.bedfile(StringIO(''))
    a = list(a)
    assert len(a) == 0

    # now check with a filename.
    a = bedparser.bedfile('inputfiles/empty.bed')
    a = list(a)
    assert len(a) == 0
Ejemplo n.º 2
0
def test_tracknames():
    """Bed file with a trackline definition"""
    a = bedparser.bedfile('inputfiles/multi.tracks.3.fields.bed')
    a = list(a)
    check_multitracks_first(a[0])
    check_multitracks_last(a[-1])

    # now check with an open file.
    a = bedparser.bedfile(open('inputfiles/multi.tracks.3.fields.bed'))
    a = list(a)
    check_multitracks_first(a[0])
    check_multitracks_last(a[-1])
Ejemplo n.º 3
0
def test_singlefeature_filename():
    '''single-feature bed file with an extra newline on the end.'''
    f = 'inputfiles/single.track.unnamed.single.feature.bed'
    a = bedparser.bedfile(f)
    a = list(a)
    assert len(a) == 1
    check_singlefeature(a[0])
Ejemplo n.º 4
0
def test_9fields():
    'parse single.track.9.fields.bed'
    f = open('inputfiles/single.track.9.fields.bed')
    a = bedparser.bedfile(f)
    a = list(a)
    assert len(a) == 4
    check_9fields_first(a[0])
    check_9fields_last(a[-1])

    # check by passing a filename instead of open file
    f = 'inputfiles/single.track.9.fields.bed'
    a = bedparser.bedfile(f)
    a = list(a)
    assert len(a) == 4
    check_9fields_first(a[0])
    check_9fields_last(a[-1])
Ejemplo n.º 5
0
def bedSplitter(infile, outfile):
    """
    Bins BED features with common names into individual tracks.

    *infile*

        An open file-like object

    *outfile*

        A file-like object open for writing.
    """
    # first, make sure it's sorted by name.
    outfile = open(outfile,'w')
    tmp = tempfile.mktemp()
    cmd = 'sort -k 4 %s > %s' % (infile, tmp)
    print cmd
    os.system(cmd)
    trackname = None
    lastname = None
    for i in bedparser.bedfile(tmp):
        if i.name != lastname:
            trackname = i.name
            outfile.write('track name=%s description=%s itemRgb=1\n' % (i.name,i.name))
        outfile.write(i.tostring())
        lastname = i.name
Ejemplo n.º 6
0
def bedSplitter(infile, outfile):
    """
    Bins BED features with common names into individual tracks.

    *infile*

        An open file-like object

    *outfile*

        A file-like object open for writing.
    """
    # first, make sure it's sorted by name.
    outfile = open(outfile, 'w')
    tmp = tempfile.mktemp()
    cmd = 'sort -k 4 %s > %s' % (infile, tmp)
    print cmd
    os.system(cmd)
    trackname = None
    lastname = None
    for i in bedparser.bedfile(tmp):
        if i.name != lastname:
            trackname = i.name
            outfile.write('track name=%s description=%s itemRgb=1\n' %
                          (i.name, i.name))
        outfile.write(i.tostring())
        lastname = i.name
Ejemplo n.º 7
0
def bedSizeFilter(infile, outfile, minlen=None, maxlen=None):
    if maxlen is None:
        maxlen = 1e30
    if minlen is None:
        minlen = -1e30
    for i in bedparser.bedfile(infile):
        length = abs(i.start - i.stop)
        if minlen < length < maxlen:
            outfile.write('%s\t%s\t%s\n' % (i.chr, i.start, i.stop))
Ejemplo n.º 8
0
def bedSizeFilter(infile, outfile, minlen=None, maxlen=None):
    if maxlen is None:
        maxlen = 1e30
    if minlen is None:
        minlen = -1e30
    for i in bedparser.bedfile(infile):
        length = abs(i.start-i.stop)
        if minlen < length < maxlen:
            outfile.write('%s\t%s\t%s\n' % (i.chr, i.start,i.stop))
Ejemplo n.º 9
0
def test_checkfields():
    """Test a single-line bedfile."""
    data = StringIO("""chrX\t10\t100\n""")
    a = bedparser.bedfile(data)
    a = list(a)
    i = a[0]
    assert i.chr == 'chrX'
    assert i.start == 10
    assert i.stop == 100
    assert len(a) == 1
Ejemplo n.º 10
0
def bedColorUniques(a, b, oa, ob, multicolor, uniquecolor):
    tmp1 = tempfile.mktemp()
    tmp2 = tempfile.mktemp()
    cmds = """awk -F "\\t"  'BEGIN {OFS="\\t"}{print $1, $2, $3, $4, $5, $6}' %s > %s""" % (
        a, tmp1)
    log.info(cmds)
    os.system(cmds)

    cmds = """awk -F "\\t"  'BEGIN {OFS="\\t"}{print $1, $2, $3, $4, $5, $6}' %s > %s""" % (
        b, tmp2)
    log.info(cmds)
    os.system(cmds)

    log.info('intersecting')
    tmp3 = tempfile.mktemp()
    cmds = """
    intersectBed \\
    -a %s \\
    -b %s \\
    -v \\
    -s \\
    > %s""" % (tmp1, tmp2, tmp3)
    os.system(cmds)

    log.info('using bedparser.py to recolor multireads')
    outa = open(oa, 'w')
    for i in bedparser.bedfile(tmp3):
        i.thickStart = i.start
        i.thickStop = i.stop
        i.itemRGB = multicolor
        outa.write(i.tostring())
    outa.close()

    log.info('using bedparser.py to recolor unique reads')
    outb = open(ob, 'w')
    for i in bedparser.bedfile(b):
        i.thickStart = i.start
        i.thickStop = i.stop
        i.itemRGB = uniquecolor
        outb.write(i.tostring())
    outb.close()
Ejemplo n.º 11
0
def get_min_dist(fn):
    '''Returns the minimum distance between bed features'''
    bedconfirmed = bedfile(fn)
    pos = 0
    mindist = 1e35
    chr = None
    for b in bedconfirmed:
        if b.chr == chr:
            dist = b.start-pos
            if dist < mindist:
                mindist = dist
        else:
            chr = b.chr
        pos = b.stop
    return mindist
def get_min_dist(fn):
    '''Returns the minimum distance between bed features'''
    bedconfirmed = bedfile(fn)
    pos = 0
    mindist = 1e35
    chr = None
    for b in bedconfirmed:
        if b.chr == chr:
            dist = b.start - pos
            if dist < mindist:
                mindist = dist
        else:
            chr = b.chr
        pos = b.stop
    return mindist
Ejemplo n.º 13
0
def bedValueFilter(fn, minval, maxval, outfn, bedgraph=False):
    """Filters a bed file by value.  *fn* is a filename, while *outfn* is a
    file-like handle.""" 
    if maxval is None:
        maxval = 1e30
    if minval is None:
        minval = -1e30
    
    if bedgraph:
        iterator = bedparser.bedgraph(fn)
    else:
        iterator = bedparser.bedfile(fn)

    for i in iterator:
        if i.value is None:
            print "No value for this feature (%s)" % i
            sys.exit(1)
        if minval < i.value < maxval:
            fout.write('%s\t%s\t%s\t%s\n' % (i.chr, i.start,i.stop,i.value))
    if outfn is not None:
        fout.close()
Ejemplo n.º 14
0
def bedValueFilter(fn, minval, maxval, outfn, bedgraph=False):
    """Filters a bed file by value.  *fn* is a filename, while *outfn* is a
    file-like handle."""
    if maxval is None:
        maxval = 1e30
    if minval is None:
        minval = -1e30

    if bedgraph:
        iterator = bedparser.bedgraph(fn)
    else:
        iterator = bedparser.bedfile(fn)

    for i in iterator:
        if i.value is None:
            print "No value for this feature (%s)" % i
            sys.exit(1)
        if minval < i.value < maxval:
            fout.write('%s\t%s\t%s\t%s\n' % (i.chr, i.start, i.stop, i.value))
    if outfn is not None:
        fout.close()
Ejemplo n.º 15
0
def bed2db(bedfn, dbfn):
    conn = sqlite3.connect(dbfn)
    c = conn.cursor()
    c.execute('''
    CREATE TABLE features (chrom text,
                           start int,
                           stop int, 
                           name text,
                           value float,
                           strand text,
                           thickStart int,
                           thickStop int, 
                           itemRGB text,
                           blockCount int,
                           blockSizes text,
                           blockStarts text)
    ''')
    for feature in bedparser.bedfile(bedfn):
        items = [feature.chr, 
                 feature.start, 
                 feature.stop, 
                 feature.name, 
                 feature.value, 
                 feature.strand, 
                 feature.thickStart,
                 feature.thickStop, 
                 feature.itemRGB,
                 feature.blockCount,
                 feature.blockSizes, 
                 feature.blockStarts]
        c.execute('''
        INSERT INTO features VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
        ''',items)

    c.execute('CREATE INDEX idx_starts ON features(start)')
    c.execute('CREATE INDEX idx_stops ON features(stop)')
    c.execute('CREATE INDEX idx_chroms ON features(chrom)')
    c.execute('CREATE INDEX idx_values ON features(value)')
    conn.commit()
    return conn
Ejemplo n.º 16
0
def main(options):
    if (options.left or options.right) and options.flank:
        raise ValueError, '-f cannot be specified if -r or -l is used.'
        sys.exit(1)

    if options.flank:
        options.left = options.flank
        options.right = options.flank

    if not options.input:
        raise ValueError, 'Need input file.'

    if options.input == 'stdin':
        options.input = sys.stdin

    if not options.left or not options.right:
        raise ValueError, 'Flanking region not specified'
        

    s = '%s\t%s\t%s'
    for i in bedparser.bedfile(options.input):
        print s % (i.chr, i.start - options.left - options.buffer, i.start - options.buffer)
        print s % (i.chr, i.stop + options.buffer, i.stop + options.right + options.buffer)