Example #1
0
def convert_fastq(fq,ofq,out_lnum=4,out_baseQ=33,tickon = 10000):
    nreads = preprocess_radtag_lane.get_read_count(fq)
    lnum,baseQ = preprocess_radtag_lane.get_fastq_properties(fq)
    fh = preprocess_radtag_lane.smartopen(fq)
    ofh = preprocess_radtag_lane.smartopen(ofq,'w')
    for i in xrange(nreads):
        if i%tickon == 0:
            print >> sys.stderr, '\r%s / %s (%0.1f%%)' % (i,nreads,(float(i)/nreads)*100),
        n,s,qs = preprocess_radtag_lane.next_read_from_fh(fh, lnum)
        ofh.write(preprocess_radtag_lane.as_fq_line(n,s,qs_to_q(qs,baseQ),out_baseQ,out_lnum))
    print >> sys.stderr,'\n'
Example #2
0
def convert_fastq(fq, ofq, out_lnum=4, out_baseQ=33, tickon=10000):
    nreads = preprocess_radtag_lane.get_read_count(fq)
    lnum, baseQ = preprocess_radtag_lane.get_fastq_properties(fq)
    fh = preprocess_radtag_lane.smartopen(fq)
    ofh = preprocess_radtag_lane.smartopen(ofq, 'w')
    for i in xrange(nreads):
        if i % tickon == 0:
            print >> sys.stderr, '\r%s / %s (%0.1f%%)' % (i, nreads,
                                                          (float(i) / nreads) *
                                                          100),
        n, s, qs = preprocess_radtag_lane.next_read_from_fh(fh, lnum)
        ofh.write(
            preprocess_radtag_lane.as_fq_line(n, s, qs_to_q(qs, baseQ),
                                              out_baseQ, out_lnum))
    print >> sys.stderr, '\n'
Example #3
0
for paired end, argv:
cutsite,fq1,fq2,outfile1,outfile2

'''

import preprocess_radtag_lane
import os, sys

barcode_len = 5
tick = 10000  #update progress every this-many reads

if __name__ == "__main__":
    if len(sys.argv) == 4:
        cutsite, fq, outfile = sys.argv[1:]
        rc = preprocess_radtag_lane.get_read_count(fq)
        lnum, baseQ = preprocess_radtag_lane.get_fastq_properties(fq)

        fh = preprocess_radtag_lane.smartopen(fq)
        ofh = preprocess_radtag_lane.smartopen(outfile, 'w')

        found = 0
        for i in range(rc):
            if i > 0 and i % tick == 0:
                print >> sys.stderr, '\r%s / %s (%0.1f%%) found %s (%0.1f%%)' % \
                      (i,rc,(float(i)/rc)*100,found,(float(found)/i)*100),
            n, s, q = preprocess_radtag_lane.next_read_from_fh(fh, lnum)
            if s[barcode_len:barcode_len + len(cutsite)] == cutsite:
                line = preprocess_radtag_lane.as_fq_line(n, s, q, None, lnum)
                ofh.write(line)
                found += 1
        ofh.close()
Example #4
0
    parser.add_argument('infiles',nargs='+',help='2 fastq files corresponding to reads from a single lane/index, and optionally read 2 sequences for that lane/index')

    opts = parser.parse_args()

    if opts.flowcell is None or opts.lane is None:
        raise ValueError, '--flowcell and --lane (and --index as appropriate) must be specified'

    if len(opts.infiles) != 2: #PE
        errstr = '2 input files must be specified; got %s ' % len(opts.infiles)
        raise ValueError, errstr

    #check fq4-33
    for fq in opts.infiles:
        print >> sys.stderr, '\nfile: %s' % fq
        lnum,baseQ = preprocess_radtag_lane.get_fastq_properties(fq)
        print >> sys.stderr, 'lnum: %s\nbaseQ: %s' % (lnum,baseQ)
        if not (lnum == 4 and baseQ == 33):
            fqbase,fqext = fq_splitext(fq)
            prev_fq = '%s.fq%s-%s%s' % (fqbase,lnum,baseQ,fqext)
            print >> sys.stderr, 'must be 4-line, base 33 fastq to proceed; convert\nnew file will be %s\noriginal kept as %s\n' % (fq,prev_fq)
            save_previous_and_covert(prev_fq,fq)

    adapterstype = get_adapterstype(opts.flowcell,opts.lane,opts.index)
    adaptseq = get_adaptseq()
    adaptA,adaptB = adaptseq[adapterstype]['r1'],adaptseq[adapterstype]['r2']
    print >> sys.stderr, 'use adapterstype: %s\nadaptA: %s\nadaptB: %s' % (adapterstype,adaptA,adaptB)

    #run seqprep
    if opts.seqprep_base:
        sp_base = opts.seqprep_base