コード例 #1
0
def trim_file(infile, adapter, outfile, threads=1, phred=33):
    read_queue = Queue()
    result_queue = Queue()
    trimmed_queue = Queue()
    workers = []

    def start_workers():
        for i in xrange(threads):
            worker = Worker(queue=read_queue,
                            results=result_queue,
                            phred64=phred == 64,
                            adapter=adapter)
            workers.append(worker)
            worker.start()

    writer = Writer(queue=result_queue, trimmed=trimmed_queue, outfile=outfile)
    writer.start()
    batch = []
    for index, read in enumerate(FastqReader(infile)):
        batch.append(read)
        if index < 1000 and phred == 33:
            if any([i for i in read.qualities if ord(i) > 74]):
                phred = 64
        if index % 10000 == 0:
            if not workers:
                start_workers()
            read_queue.put(batch)
            batch = []
    if not workers:
        start_workers()
    read_queue.put(batch)
    processed = index + 1
    # poison pill to stop workers
    for i in xrange(threads):
        read_queue.put(None)

    for i in workers:
        i.join()

    # poison pill for writers
    result_queue.put(None)

    # wait for writing to finish
    writer.join()
    #print "Output done"

    trimmed_queue.put(None)

    kept_reads = sum([i for i in iter(trimmed_queue.get, None)])

    return (phred, processed, kept_reads)
コード例 #2
0
    def test_context_manager(self):
        filename = "tests/data/simple.fastq"
        with open(filename) as f:
            assert not f.closed
            reads = list(openseq(f))
            assert not f.closed
        assert f.closed

        with FastqReader(filename) as sr:
            tmp_sr = sr
            assert not sr._file.closed
            reads = list(sr)
            assert not sr._file.closed
        assert tmp_sr._file is None
コード例 #3
0
 def test_fastq_incomplete(self):
     fastq = StringIO("@name\nACGT+\n")
     with FastqReader(fastq) as fq:
         list(fq)
コード例 #4
0
 def test_fastq_wrongformat(self):
     with FastqReader("tests/data/withplus.fastq") as f:
         reads = list(f)
コード例 #5
0
 def test_fastqreader_dos(self):
     with FastqReader("tests/data/dos.fastq") as f:
         dos_reads = list(f)
     with FastqReader("tests/data/small.fastq") as f:
         unix_reads = list(f)
     assert dos_reads == unix_reads
コード例 #6
0
 def test_fastqreader(self):
     with FastqReader("tests/data/simple.fastq") as f:
         reads = list(f)
     assert reads == simple_fastq
コード例 #7
0
ファイル: trim_file.py プロジェクト: mhalushka/miRge-1
def main():
    args = parser.parse_args()
    dest = args.outfile
    phred = args.phred64 or 33
    threads = args.threads
    logfile = args.logfile

    adapter = args.adapter

    read_queue = Queue()
    result_queue = Queue()
    trimmed_queue = Queue()

    workers = []

    def start_workers():
        for i in xrange(threads):
            worker = Worker(queue=read_queue,
                            results=result_queue,
                            phred64=phred == 64,
                            adapter=adapter)
            workers.append(worker)
            worker.start()

    writer = Writer(queue=result_queue, trimmed=trimmed_queue, outfile=dest)
    writer.start()

    batch = []
    for index, read in enumerate(FastqReader(args.infile.name)):
        batch.append(read)
        if index < 1000 and phred == 33:
            if any([i for i in read.qualities if ord(i) > 74]):
                phred = 64
        if index % 10000 == 0:
            if not workers:
                start_workers()
            read_queue.put(batch)
            batch = []
    if not workers:
        start_workers()
    read_queue.put(batch)
    processed = index + 1

    # poison pill to stop workers
    for i in xrange(threads):
        read_queue.put(None)

    for i in workers:
        i.join()

    # poison pill for writers
    result_queue.put(None)

    # wait for writing to finish
    writer.join()

    trimmed_queue.put(None)

    kept_reads = sum([i for i in iter(trimmed_queue.get, None)])

    with logfile as o:
        o.write('Starting reads: {0}\n'.format(processed))
        o.write('Processed reads: {0}\n'.format(kept_reads))

    sys.stdout.write('{0}\n'.format(phred))
    return phred