def describe_fastq(filename):
    if preprocess_radtag_lane.smartopen(filename).read(1) == '@':
        lnum = 4
    else:
        lnum = 1
    baseQ = None
    fh = preprocess_radtag_lane.smartopen(filename)
    while baseQ is None:
        n,s,q = preprocess_radtag_lane.next_read_from_fh(fh)
        baseQ = preprocess_radtag_lane.get_baseQ(q)
    fh.close()
    return lnum,baseQ
def get_fastq_properties(fq):
    if smartopen(fq).read(1) == '@':
        lnum = 4
    else:
        lnum = 1
    print >> sys.stderr, 'fastq format lnum: %s' % lnum

    baseQ = None
    qfh = smartopen(fq)
    while baseQ is None:
        t,r,q = preprocess_radtag_lane.next_read_from_fh(qfh,lnum)
        baseQ = preprocess_radtag_lane.get_baseQ(q)
    qfh.close()
    print >> sys.stderr, 'fastq format baseQ: %s' % baseQ

    readlen = len(r)
    print >> sys.stderr, 'fastq format readlen: %s' % readlen

    return lnum,baseQ,readlen
Example #3
0
#!/usr/bin/env python

import Seq, os,sys
from radtag_denovo import preprocess_radtag_lane
from Util import smartopen

def join_pair(r1,r2,num_n=10,qual_n='#'):
    return [r1[0],r1[1]+'N'*num_n+str(Seq.Sequence(r2[1]).rc()),r1[2]+qual_n*num_n+''.join(reversed(r2[2]))]

if __name__ == "__main__":
    f1,f2 = sys.argv[1:]
    fh1 = smartopen(f1)
    fh2 = smartopen(f2)

    rc = preprocess_radtag_lane.get_read_count(f1)
    
    for i in xrange(rc):
        if i % 1000 == 0:
            print >> sys.stderr, '\r%s / %s' % (i,rc),
        r1 = preprocess_radtag_lane.next_read_from_fh(fh1,4)
        r2 = preprocess_radtag_lane.next_read_from_fh(fh2,4)
        print preprocess_radtag_lane.as_fq4_lines(*join_pair(r1,r2))
    print >> sys.stderr, '\ndone'

    
    
    if end == '':
        end = readlen

    readcount = preprocess_radtag_lane.get_read_count(fq)

    qsc_n = 0
    qsc_tot = numpy.zeros(readlen)
    qsc_by_read = []

    fh = smartopen(fq)

    tickon = readcount/1000
    for i in range(readcount):
        if i % tickon == 0:
            print >> sys.stderr, '\r%0.1f' % ((i/float(readcount)) * 100),
        t,r,q = preprocess_radtag_lane.next_read_from_fh(fh,lnum)
        qsc = [ord(c)-baseQ for c in q]
        qsc_n += 1
        qsc_tot += qsc
        qsc_by_read.append(numpy.mean(qsc[start:end]))

    qsc_by_base = list(qsc_tot/qsc_n)

    print >> sys.stderr, 'write per-base mean qual ...',
    open(fq+'-per_base_qual.list','w').write(qsc_by_base.__repr__())
    print >> sys.stderr, 'done'
    print >> sys.stderr, 'write per-read qual ..',
    open(fq+'-per_read_qual.list','w').write(qsc_by_read.__repr__())
    print >> sys.stderr, 'done'
#!/usr/bin/env python

import os, sys
from radtag_denovo import preprocess_radtag_lane

infile, outfile = sys.argv[1:]

if not os.path.exists(os.path.dirname(outfile)):
    os.makedirs(os.path.dirname(outfile))

ifh = preprocess_radtag_lane.smartopen(infile)
ofh = preprocess_radtag_lane.smartopen(outfile, "w")

r = preprocess_radtag_lane.next_read_from_fh(ifh)
while r[0]:
    r[0] = "%s %s:%s" % (tuple(r[0].rsplit(":", 2)))
    ofh.write(preprocess_radtag_lane.as_fq4_lines(*r))
    r = preprocess_radtag_lane.next_read_from_fh(ifh)

ifh.close()
ofh.close()