def test_paired(self): """Check FASTQ parsing matches FASTA+QUAL parsing""" with open("Quality/example.fasta") as f: with open("Quality/example.qual") as q: records1 = list(QualityIO.PairedFastaQualIterator(f, q)) records2 = list(SeqIO.parse("Quality/example.fastq", "fastq")) self.assertTrue(compare_records(records1, records2))
def test_paired(self): """Check FASTQ parsing matches FASTA+QUAL parsing""" records1 = list(\ QualityIO.PairedFastaQualIterator(open("Quality/example.fasta"), open("Quality/example.qual"))) records2 = list(SeqIO.parse(open("Quality/example.fastq"), "fastq")) self.assert_(compare_records(records1, records2))
def not_trimmed(cur, conf, options, sequence, qual): cur.execute('SELECT name FROM sequence WHERE cluster = %s', (options.species)) data = cur.fetchall() dataset = set() for d in data: dataset.add(d[0]) seqs = QualityIO.PairedFastaQualIterator( open(conf.get('Input', 'sequence'), "rU"), open(conf.get('Input', 'qual'), "rU")) try: while seqs: record = seqs.next() if record.name in dataset: sequence.write('%s' % record.format('fasta')) qual.write('%s' % record.format('qual')) except StopIteration: pass qual.close() sequence.close()
def main(): '''Main loop''' start_time = time.time() options, arg = interface() motd() print 'Started: ', time.strftime("%a %b %d, %Y %H:%M:%S", time.localtime(start_time)) conf = ConfigParser.ConfigParser() conf.read(options.conf) # build our configuration params = Parameters(conf) conn = MySQLdb.connect(user=params.user, passwd=params.pwd, db=params.db) cur = conn.cursor() # crank out a new table for the data createSeqTable(cur) conn.commit() seqcount = sequenceCount(conf.get('Input', 'sequence')) sequence = QualityIO.PairedFastaQualIterator( open(conf.get('Input', 'sequence'), "rU"), open(conf.get('Input', 'qual'), "rU")) #pdb.set_trace() if conf.getboolean('Multiprocessing', 'MULTIPROCESSING'): # get num processors n_procs = conf.get('Multiprocessing', 'processors') if n_procs == 'Auto': # we'll use x-1 cores (where x = avail. cores) n_procs = multiprocessing.cpu_count() - 1 else: n_procs = int(n_procs) print 'Multiprocessing. Number of processors = ', n_procs # to test with fewer sequences #count = 0 try: threads = [] pb = progress.bar(0, seqcount, 60) pb_inc = 0 while sequence: if len(threads) < n_procs: p = multiprocessing.Process(target=linkerWorker, args=( sequence.next(), params, )) p.start() threads.append(p) if (pb_inc + 1) % 1000 == 0: pb.__call__(pb_inc + 1) elif pb_inc + 1 == seqcount: pb.__call__(pb_inc + 1) pb_inc += 1 else: for t in threads: if not t.is_alive(): threads.remove(t) except StopIteration: pass else: print 'Not using multiprocessing' count = 0 try: pb = progress.bar(0, seqcount, 60) pb_inc = 0 #while count < 1000: while sequence: #count +=1 linkerWorker(sequence.next(), params) if (pb_inc + 1) % 1000 == 0: pb.__call__(pb_inc + 1) elif pb_inc + 1 == seqcount: pb.__call__(pb_inc + 1) pb_inc += 1 except StopIteration: pass print '\n' cur.close() conn.close() end_time = time.time() print 'Ended: ', time.strftime("%a %b %d, %Y %H:%M:%S", time.localtime(end_time)) print '\nTime for execution: ', (end_time - start_time) / 60, 'minutes'
def action(arguments): """ Given parsed arguments, filter input files. """ if arguments.quality_window_mean_qual and not arguments.quality_window: raise ValueError("--quality-window-mean-qual specified without " "--quality-window") if trie is None or triefind is None: raise ValueError( 'Missing Bio.trie and/or Bio.triefind modules. Cannot continue') filters = [] input_type = fileformat.from_handle(arguments.sequence_file) output_type = fileformat.from_handle(arguments.output_file) with arguments.sequence_file as fp: if arguments.input_qual: sequences = QualityIO.PairedFastaQualIterator( fp, arguments.input_qual) else: sequences = SeqIO.parse(fp, input_type) listener = RecordEventListener() if arguments.details_out: rh = RecordReportHandler(arguments.details_out, arguments.argv, arguments.details_comment) rh.register_with(listener) # Track read sequences sequences = listener.iterable_hook('read', sequences) # Add filters if arguments.min_mean_quality and input_type == 'fastq': qfilter = QualityScoreFilter(arguments.min_mean_quality) filters.append(qfilter) if arguments.max_length: max_length_filter = MaxLengthFilter(arguments.max_length) filters.append(max_length_filter) if arguments.min_length: min_length_filter = MinLengthFilter(arguments.min_length) filters.append(min_length_filter) if arguments.max_ambiguous is not None: max_ambig_filter = MaxAmbiguousFilter(arguments.max_ambiguous) filters.append(max_ambig_filter) if arguments.pct_ambiguous is not None: pct_ambig_filter = PctAmbiguousFilter(arguments.pct_ambiguous) filters.append(pct_ambig_filter) if arguments.ambiguous_action: ambiguous_filter = AmbiguousBaseFilter(arguments.ambiguous_action) filters.append(ambiguous_filter) if arguments.quality_window: min_qual = (arguments.quality_window_mean_qual or arguments.min_mean_quality) window_filter = WindowQualityScoreFilter(arguments.quality_window, min_qual) filters.insert(0, window_filter) if arguments.barcode_file: with arguments.barcode_file: tr = parse_barcode_file(arguments.barcode_file, arguments.primer, arguments.barcode_header) f = PrimerBarcodeFilter(tr) filters.append(f) if arguments.map_out: barcode_writer = csv.writer( arguments.map_out, quoting=getattr(csv, arguments.quoting), lineterminator='\n') def barcode_handler(record, sample, barcode=None): barcode_writer.writerow((record.id, sample)) listener.register_handler('found_barcode', barcode_handler) for f in filters: f.listener = listener sequences = f.filter_records(sequences) # Track sequences which passed all filters sequences = listener.iterable_hook('write', sequences) with arguments.output_file: SeqIO.write(sequences, arguments.output_file, output_type) rpt_rows = (f.report_dict() for f in filters) # Write report with arguments.report_out as fp: writer = csv.DictWriter( fp, BaseFilter.report_fields, lineterminator='\n', delimiter='\t') writer.writeheader() writer.writerows(rpt_rows)
seq, tag = str(record.seq), str(record.seq) seq_match, tag_match, score, start, end = pairwise2.align.localms( seq, tag, 5.0, -4.0, -9.0, -0.5, one_alignment_only=True)[0] #name = multiprocessing.current_process().name #print 'Worker', name, str(record.seq) print "Parent: ", os.getppid(), "Child: ", os.getpid(), "Count: ", count return if __name__ == '__main__': start_time = time.time() conf = ConfigParser.ConfigParser() conf.read('mc454.conf') #jobs = [] record = QualityIO.PairedFastaQualIterator( open(conf.get('Input', 'sequence'), "rU"), open(conf.get('Input', 'qual'), "rU")) mproc = True if mproc == True: count = 0 try: while count < 500: #pdb.set_trace() jobs = [] for i in range(multiprocessing.cpu_count()): count += 1 p = multiprocessing.Process(target=worker, args=(record.next(), count)) jobs.append(p) p.start() #p.join()