def main(argv): try: refseq_fname = argv[1] read_fname = argv[2] mate_fname = argv[3] except IndexError: sys.exit("Usage: %s REFSEQ_FN READ_FN MATE_FN" % sys.argv[0]) seq_list_len = 5000 max_isize = pairing_batch_size = 1000 gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina') mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina') pairs_flow = it.izip(read_flow, mate_flow) res = [] while 1: pairs = list(it.islice(pairs_flow, 0, seq_list_len)) if len(pairs) == 0: break bwts = bwa.restore_index(refseq_fname) bnsp, pacseq = bwa.restore_reference(refseq_fname) l = len(pairs) bwsa = bwa.build_bws_array(pairs) logger = logging.getLogger("test") logger.setLevel(logging.DEBUG) counters = get_counters() ctx = ContextStub() visitor = MRVisitor(logger, ctx, counters) bwa_iterator = BWAIterator(refseq_fname, gopt, popt, max_isize, pairing_batch_size, visitor) for read, mate in bwa_iterator.analyze(bwsa, l): print read.get_name(), mate.get_name() for j in 0, 1: bwa.free_seq(l, bwsa[j]) bwa.bns_destroy(bwa_iterator.bnsp) for cn, c in counters.iteritems(): sys.stderr.write("%s = %d\n" % (cn, c.value))
def main(argv): try: refseq_fname = argv[1] read_fname = argv[2] mate_fname = argv[3] except IndexError: sys.exit("Usage: %s REFSEQ_FN READ_FN MATE_FN" % sys.argv[0]) seq_list_len = 10000 max_isize = pairing_batch_size = 10000 gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina') mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina') pairs_flow = it.izip(read_flow, mate_flow) pairs = list(it.islice(pairs_flow, 0, seq_list_len)) bwts = bwa.restore_index(refseq_fname) bnsp, pacseq = bwa.restore_reference(refseq_fname) l = len(pairs) bwsa = bwa.build_bws_array(pairs) bwa_iterator = BWAIterator(refseq_fname, gopt, popt, max_isize, pairing_batch_size) pairs = [p for p in bwa_iterator.analyze(bwsa, l)] print "READ POS GAPO GAPE MM STRAND SCORE CIGAR" for read, mate in pairs: if read.n_multi > 0: print multi_list = [m for m in read.itermulti()] for m in multi_list: print read.get_name(), m.pos, m.n_gapo, m.n_gape, m.n_mm, m.strand, \ m.score, m.get_cigar(read.len) for j in 0, 1: bwa.free_seq(l, bwsa[j]) bwa.bns_destroy(bwa_iterator.bnsp)