def describe_fastq(filename): if preprocess_radtag_lane.smartopen(filename).read(1) == '@': lnum = 4 else: lnum = 1 baseQ = None fh = preprocess_radtag_lane.smartopen(filename) while baseQ is None: n,s,q = preprocess_radtag_lane.next_read_from_fh(fh) baseQ = preprocess_radtag_lane.get_baseQ(q) fh.close() return lnum,baseQ
def get_fastq_properties(fq): if smartopen(fq).read(1) == '@': lnum = 4 else: lnum = 1 print >> sys.stderr, 'fastq format lnum: %s' % lnum baseQ = None qfh = smartopen(fq) while baseQ is None: t,r,q = preprocess_radtag_lane.next_read_from_fh(qfh,lnum) baseQ = preprocess_radtag_lane.get_baseQ(q) qfh.close() print >> sys.stderr, 'fastq format baseQ: %s' % baseQ readlen = len(r) print >> sys.stderr, 'fastq format readlen: %s' % readlen return lnum,baseQ,readlen
#!/usr/bin/env python import Seq, os,sys from radtag_denovo import preprocess_radtag_lane from Util import smartopen def join_pair(r1,r2,num_n=10,qual_n='#'): return [r1[0],r1[1]+'N'*num_n+str(Seq.Sequence(r2[1]).rc()),r1[2]+qual_n*num_n+''.join(reversed(r2[2]))] if __name__ == "__main__": f1,f2 = sys.argv[1:] fh1 = smartopen(f1) fh2 = smartopen(f2) rc = preprocess_radtag_lane.get_read_count(f1) for i in xrange(rc): if i % 1000 == 0: print >> sys.stderr, '\r%s / %s' % (i,rc), r1 = preprocess_radtag_lane.next_read_from_fh(fh1,4) r2 = preprocess_radtag_lane.next_read_from_fh(fh2,4) print preprocess_radtag_lane.as_fq4_lines(*join_pair(r1,r2)) print >> sys.stderr, '\ndone'
if end == '': end = readlen readcount = preprocess_radtag_lane.get_read_count(fq) qsc_n = 0 qsc_tot = numpy.zeros(readlen) qsc_by_read = [] fh = smartopen(fq) tickon = readcount/1000 for i in range(readcount): if i % tickon == 0: print >> sys.stderr, '\r%0.1f' % ((i/float(readcount)) * 100), t,r,q = preprocess_radtag_lane.next_read_from_fh(fh,lnum) qsc = [ord(c)-baseQ for c in q] qsc_n += 1 qsc_tot += qsc qsc_by_read.append(numpy.mean(qsc[start:end])) qsc_by_base = list(qsc_tot/qsc_n) print >> sys.stderr, 'write per-base mean qual ...', open(fq+'-per_base_qual.list','w').write(qsc_by_base.__repr__()) print >> sys.stderr, 'done' print >> sys.stderr, 'write per-read qual ..', open(fq+'-per_read_qual.list','w').write(qsc_by_read.__repr__()) print >> sys.stderr, 'done'
#!/usr/bin/env python import os, sys from radtag_denovo import preprocess_radtag_lane infile, outfile = sys.argv[1:] if not os.path.exists(os.path.dirname(outfile)): os.makedirs(os.path.dirname(outfile)) ifh = preprocess_radtag_lane.smartopen(infile) ofh = preprocess_radtag_lane.smartopen(outfile, "w") r = preprocess_radtag_lane.next_read_from_fh(ifh) while r[0]: r[0] = "%s %s:%s" % (tuple(r[0].rsplit(":", 2))) ofh.write(preprocess_radtag_lane.as_fq4_lines(*r)) r = preprocess_radtag_lane.next_read_from_fh(ifh) ifh.close() ofh.close()