コード例 #1
0
ファイル: overlap_preprocess.py プロジェクト: alexagrf/rtd
def convert_fastq(fq,ofq,out_lnum=4,out_baseQ=33,tickon = 10000):
    nreads = preprocess_radtag_lane.get_read_count(fq)
    lnum,baseQ = preprocess_radtag_lane.get_fastq_properties(fq)
    fh = preprocess_radtag_lane.smartopen(fq)
    ofh = preprocess_radtag_lane.smartopen(ofq,'w')
    for i in xrange(nreads):
        if i%tickon == 0:
            print >> sys.stderr, '\r%s / %s (%0.1f%%)' % (i,nreads,(float(i)/nreads)*100),
        n,s,qs = preprocess_radtag_lane.next_read_from_fh(fh, lnum)
        ofh.write(preprocess_radtag_lane.as_fq_line(n,s,qs_to_q(qs,baseQ),out_baseQ,out_lnum))
    print >> sys.stderr,'\n'
コード例 #2
0
ファイル: overlap_preprocess.py プロジェクト: xguse/rtd
def convert_fastq(fq, ofq, out_lnum=4, out_baseQ=33, tickon=10000):
    nreads = preprocess_radtag_lane.get_read_count(fq)
    lnum, baseQ = preprocess_radtag_lane.get_fastq_properties(fq)
    fh = preprocess_radtag_lane.smartopen(fq)
    ofh = preprocess_radtag_lane.smartopen(ofq, 'w')
    for i in xrange(nreads):
        if i % tickon == 0:
            print >> sys.stderr, '\r%s / %s (%0.1f%%)' % (i, nreads,
                                                          (float(i) / nreads) *
                                                          100),
        n, s, qs = preprocess_radtag_lane.next_read_from_fh(fh, lnum)
        ofh.write(
            preprocess_radtag_lane.as_fq_line(n, s, qs_to_q(qs, baseQ),
                                              out_baseQ, out_lnum))
    print >> sys.stderr, '\n'
コード例 #3
0
ファイル: read_quality_statistics.py プロジェクト: xguse/rtd
def get_fastq_properties(fq):
    if smartopen(fq).read(1) == '@':
        lnum = 4
    else:
        lnum = 1
    print >> sys.stderr, 'fastq format lnum: %s' % lnum

    baseQ = None
    qfh = smartopen(fq)
    while baseQ is None:
        t, r, q = preprocess_radtag_lane.next_read_from_fh(qfh, lnum)
        baseQ = preprocess_radtag_lane.get_baseQ(q)
    qfh.close()
    print >> sys.stderr, 'fastq format baseQ: %s' % baseQ

    readlen = len(r)
    print >> sys.stderr, 'fastq format readlen: %s' % readlen

    return lnum, baseQ, readlen
コード例 #4
0
def get_fastq_properties(fq):
    if smartopen(fq).read(1) == '@':
        lnum = 4
    else:
        lnum = 1
    print >> sys.stderr, 'fastq format lnum: %s' % lnum

    baseQ = None
    qfh = smartopen(fq)
    while baseQ is None:
        t,r,q = preprocess_radtag_lane.next_read_from_fh(qfh,lnum)
        baseQ = preprocess_radtag_lane.get_baseQ(q)
    qfh.close()
    print >> sys.stderr, 'fastq format baseQ: %s' % baseQ

    readlen = len(r)
    print >> sys.stderr, 'fastq format readlen: %s' % readlen

    return lnum,baseQ,readlen
コード例 #5
0
if __name__ == "__main__":
    if len(sys.argv) == 4:
        cutsite, fq, outfile = sys.argv[1:]
        rc = preprocess_radtag_lane.get_read_count(fq)
        lnum, baseQ = preprocess_radtag_lane.get_fastq_properties(fq)

        fh = preprocess_radtag_lane.smartopen(fq)
        ofh = preprocess_radtag_lane.smartopen(outfile, 'w')

        found = 0
        for i in range(rc):
            if i > 0 and i % tick == 0:
                print >> sys.stderr, '\r%s / %s (%0.1f%%) found %s (%0.1f%%)' % \
                      (i,rc,(float(i)/rc)*100,found,(float(found)/i)*100),
            n, s, q = preprocess_radtag_lane.next_read_from_fh(fh, lnum)
            if s[barcode_len:barcode_len + len(cutsite)] == cutsite:
                line = preprocess_radtag_lane.as_fq_line(n, s, q, None, lnum)
                ofh.write(line)
                found += 1
        ofh.close()
    elif len(sys.argv) == 6:
        cutsite, fq1, fq2, outfile1, outfile2 = sys.argv[1:]
        rc1 = preprocess_radtag_lane.get_read_count(fq1)
        rc2 = preprocess_radtag_lane.get_read_count(fq2)
        if rc1 != rc2:
            errstr = 'read count for %s = %s; %s = %s. counts must match' % (
                fq1, rc1, fq2.rc2)
            raise ValueError, errstr
        lnum, baseQ = preprocess_radtag_lane.get_fastq_properties(fq1)
コード例 #6
0
ファイル: find_perfect_match_reads.py プロジェクト: xguse/rtd
'''

import os, sys
import numpy
from editdist import distance
from preprocess_radtag_lane import next_read_from_fh, smartopen, get_read_count

idx_bp = 5
cut_bp = 5

lnum = 4
min_seqs = 7

uniqued, fastq = sys.argv[1:]

readlen = len(next_read_from_fh(smartopen(fastq), 4)[1])

print >> sys.stderr, 'readlen: %s' % readlen

num_reads = get_read_count(fastq, 4)
tickon = num_reads / 200

useqs = []
for l in open(uniqued):
    s, cntstr = l.strip().split()[0], l.strip().split()[4]
    cnt = numpy.mean([int(i) for i in cntstr.split(',')])
    if cnt >= min_seqs:
        useqs.append(s[cut_bp:readlen - idx_bp])

useqs = list(set(useqs))
print >> sys.stderr, '%s unique %sbp sequences in uniqued file' % (
コード例 #7
0
'''

import os,sys
import numpy
from editdist import distance
from preprocess_radtag_lane import next_read_from_fh, smartopen, get_read_count

idx_bp = 5
cut_bp = 5

lnum = 4
min_seqs = 7

uniqued, fastq = sys.argv[1:]

readlen = len(next_read_from_fh(smartopen(fastq),4)[1])

print >> sys.stderr, 'readlen: %s' % readlen

num_reads = get_read_count(fastq,4)
tickon = num_reads/200

useqs = []
for l in open(uniqued):
    s,cntstr = l.strip().split()[0], l.strip().split()[4]
    cnt = numpy.mean([int(i) for i in cntstr.split(',')])
    if cnt >= min_seqs:
        useqs.append(s[cut_bp:readlen-idx_bp])

useqs = list(set(useqs))
print >> sys.stderr, '%s unique %sbp sequences in uniqued file' % (len(useqs),len(s[cut_bp:readlen-idx_bp]))
コード例 #8
0
ファイル: read_quality_statistics.py プロジェクト: xguse/rtd
    if end == '':
        end = readlen

    readcount = preprocess_radtag_lane.get_read_count(fq)

    qsc_n = 0
    qsc_tot = numpy.zeros(readlen)
    qsc_by_read = []

    fh = smartopen(fq)

    tickon = readcount / 1000
    for i in range(readcount):
        if i % tickon == 0:
            print >> sys.stderr, '\r%0.1f' % ((i / float(readcount)) * 100),
        t, r, q = preprocess_radtag_lane.next_read_from_fh(fh, lnum)
        qsc = [ord(c) - baseQ for c in q]
        qsc_n += 1
        qsc_tot += qsc
        qsc_by_read.append(numpy.mean(qsc[start:end]))

    qsc_by_base = list(qsc_tot / qsc_n)

    print >> sys.stderr, 'write per-base mean qual ...',
    open(fq + '-per_base_qual.list', 'w').write(qsc_by_base.__repr__())
    print >> sys.stderr, 'done'
    print >> sys.stderr, 'write per-read qual ..',
    open(fq + '-per_read_qual.list', 'w').write(qsc_by_read.__repr__())
    print >> sys.stderr, 'done'
コード例 #9
0
    if end == '':
        end = readlen

    readcount = preprocess_radtag_lane.get_read_count(fq)

    qsc_n = 0
    qsc_tot = numpy.zeros(readlen)
    qsc_by_read = []

    fh = smartopen(fq)

    tickon = readcount/1000
    for i in range(readcount):
        if i % tickon == 0:
            print >> sys.stderr, '\r%0.1f' % ((i/float(readcount)) * 100),
        t,r,q = preprocess_radtag_lane.next_read_from_fh(fh,lnum)
        qsc = [ord(c)-baseQ for c in q]
        qsc_n += 1
        qsc_tot += qsc
        qsc_by_read.append(numpy.mean(qsc[start:end]))

    qsc_by_base = list(qsc_tot/qsc_n)

    print >> sys.stderr, 'write per-base mean qual ...',
    open(fq+'-per_base_qual.list','w').write(qsc_by_base.__repr__())
    print >> sys.stderr, 'done'
    print >> sys.stderr, 'write per-read qual ..',
    open(fq+'-per_read_qual.list','w').write(qsc_by_read.__repr__())
    print >> sys.stderr, 'done'
コード例 #10
0
if __name__ == "__main__":
    if len(sys.argv) == 4:
        cutsite,fq,outfile = sys.argv[1:]
        rc = preprocess_radtag_lane.get_read_count(fq)
        lnum,baseQ = preprocess_radtag_lane.get_fastq_properties(fq)

        fh = preprocess_radtag_lane.smartopen(fq)
        ofh = preprocess_radtag_lane.smartopen(outfile,'w')

        found = 0
        for i in range(rc):
            if i>0 and i % tick == 0:
                print >> sys.stderr, '\r%s / %s (%0.1f%%) found %s (%0.1f%%)' % \
                      (i,rc,(float(i)/rc)*100,found,(float(found)/i)*100),
            n,s,q = preprocess_radtag_lane.next_read_from_fh(fh,lnum)
            if s[barcode_len:barcode_len+len(cutsite)] == cutsite:
                line = preprocess_radtag_lane.as_fq_line(n,s,q,None,lnum)
                ofh.write(line)
                found += 1
        ofh.close()
    elif len(sys.argv) == 6:
        cutsite,fq1,fq2,outfile1,outfile2 = sys.argv[1:]
        rc1 = preprocess_radtag_lane.get_read_count(fq1)
        rc2 = preprocess_radtag_lane.get_read_count(fq2)
        if rc1 != rc2:
            errstr = 'read count for %s = %s; %s = %s. counts must match' % (fq1,rc1,fq2.rc2)
            raise ValueError, errstr
        lnum,baseQ = preprocess_radtag_lane.get_fastq_properties(fq1)
        
        fh1 = preprocess_radtag_lane.smartopen(fq1)