def trim_file(infile, adapter, outfile, threads=1, phred=33): read_queue = Queue() result_queue = Queue() trimmed_queue = Queue() workers = [] def start_workers(): for i in xrange(threads): worker = Worker(queue=read_queue, results=result_queue, phred64=phred == 64, adapter=adapter) workers.append(worker) worker.start() writer = Writer(queue=result_queue, trimmed=trimmed_queue, outfile=outfile) writer.start() batch = [] for index, read in enumerate(FastqReader(infile)): batch.append(read) if index < 1000 and phred == 33: if any([i for i in read.qualities if ord(i) > 74]): phred = 64 if index % 10000 == 0: if not workers: start_workers() read_queue.put(batch) batch = [] if not workers: start_workers() read_queue.put(batch) processed = index + 1 # poison pill to stop workers for i in xrange(threads): read_queue.put(None) for i in workers: i.join() # poison pill for writers result_queue.put(None) # wait for writing to finish writer.join() #print "Output done" trimmed_queue.put(None) kept_reads = sum([i for i in iter(trimmed_queue.get, None)]) return (phred, processed, kept_reads)
def test_context_manager(self): filename = "tests/data/simple.fastq" with open(filename) as f: assert not f.closed reads = list(openseq(f)) assert not f.closed assert f.closed with FastqReader(filename) as sr: tmp_sr = sr assert not sr._file.closed reads = list(sr) assert not sr._file.closed assert tmp_sr._file is None
def test_fastq_incomplete(self): fastq = StringIO("@name\nACGT+\n") with FastqReader(fastq) as fq: list(fq)
def test_fastq_wrongformat(self): with FastqReader("tests/data/withplus.fastq") as f: reads = list(f)
def test_fastqreader_dos(self): with FastqReader("tests/data/dos.fastq") as f: dos_reads = list(f) with FastqReader("tests/data/small.fastq") as f: unix_reads = list(f) assert dos_reads == unix_reads
def test_fastqreader(self): with FastqReader("tests/data/simple.fastq") as f: reads = list(f) assert reads == simple_fastq
def main(): args = parser.parse_args() dest = args.outfile phred = args.phred64 or 33 threads = args.threads logfile = args.logfile adapter = args.adapter read_queue = Queue() result_queue = Queue() trimmed_queue = Queue() workers = [] def start_workers(): for i in xrange(threads): worker = Worker(queue=read_queue, results=result_queue, phred64=phred == 64, adapter=adapter) workers.append(worker) worker.start() writer = Writer(queue=result_queue, trimmed=trimmed_queue, outfile=dest) writer.start() batch = [] for index, read in enumerate(FastqReader(args.infile.name)): batch.append(read) if index < 1000 and phred == 33: if any([i for i in read.qualities if ord(i) > 74]): phred = 64 if index % 10000 == 0: if not workers: start_workers() read_queue.put(batch) batch = [] if not workers: start_workers() read_queue.put(batch) processed = index + 1 # poison pill to stop workers for i in xrange(threads): read_queue.put(None) for i in workers: i.join() # poison pill for writers result_queue.put(None) # wait for writing to finish writer.join() trimmed_queue.put(None) kept_reads = sum([i for i in iter(trimmed_queue.get, None)]) with logfile as o: o.write('Starting reads: {0}\n'.format(processed)) o.write('Processed reads: {0}\n'.format(kept_reads)) sys.stdout.write('{0}\n'.format(phred)) return phred