def run_bwa_py_sampe_alloc_only(refseq_fname, read_fname, mate_fname): read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina') mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina') pairs = [x for x in it.izip(read_flow, mate_flow)] print_meminfo("AFTER READING PAIRS") bwsa = bwa.build_bws_array(pairs) print_meminfo("AFTER BUILDING BWSA") bwts = bwa.restore_index(refseq_fname) print_meminfo("AFTER RESTORING INDEX") bnsp, pacseq = bwa.restore_reference(refseq_fname) print_meminfo("AFTER RESTORING REFERENCE") gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() ii, last_ii = bwa.isize_info_t(), bwa.isize_info_t() last_ii.avg = -1.0 l = len(pairs) print_meminfo("AFTER INIT OPT & II") # deallocate seq & ref data for i in 0, 1: bwa.free_seq(l, bwsa[i]) bwa.bwt_destroy(bwts[i]) bwa.bns_destroy(bnsp) print_meminfo("AFTER DEALLOC") del pacseq n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) print_meminfo("AFTER DEL PACSEQ") del pairs n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) print_meminfo("AFTER DEL PAIRS")
def main(argv): try: refseq_fname = argv[1] read_fname = argv[2] mate_fname = argv[3] except IndexError: sys.exit("Usage: %s REFSEQ_FN READ_FN MATE_FN" % sys.argv[0]) seq_list_len = 5000 max_isize = pairing_batch_size = 1000 gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina') mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina') pairs_flow = it.izip(read_flow, mate_flow) res = [] while 1: pairs = list(it.islice(pairs_flow, 0, seq_list_len)) if len(pairs) == 0: break bwts = bwa.restore_index(refseq_fname) bnsp, pacseq = bwa.restore_reference(refseq_fname) l = len(pairs) bwsa = bwa.build_bws_array(pairs) logger = logging.getLogger("test") logger.setLevel(logging.DEBUG) counters = get_counters() ctx = ContextStub() visitor = MRVisitor(logger, ctx, counters) bwa_iterator = BWAIterator(refseq_fname, gopt, popt, max_isize, pairing_batch_size, visitor) for read, mate in bwa_iterator.analyze(bwsa, l): print read.get_name(), mate.get_name() for j in 0, 1: bwa.free_seq(l, bwsa[j]) bwa.bns_destroy(bwa_iterator.bnsp) for cn, c in counters.iteritems(): sys.stderr.write("%s = %d\n" % (cn, c.value))
def run_bwa_py_sampe(refseq_fname, read_fname, mate_fname): read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina') mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina') pairs = [x for x in it.izip(read_flow, mate_flow)] print_meminfo("AFTER READING PAIRS") bwsa = bwa.build_bws_array(pairs) print_meminfo("AFTER BUILDING BWSA") bwts = bwa.restore_index(refseq_fname) print_meminfo("AFTER RESTORING INDEX") bnsp, pacseq = bwa.restore_reference(refseq_fname) print_meminfo("AFTER RESTORING REFERENCE") gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() ii, last_ii = bwa.isize_info_t(), bwa.isize_info_t() last_ii.avg = -1.0 l = len(pairs) print_meminfo("AFTER INIT OPT & II") bwa.cal_sa_reg_gap(0, bwts, l, bwsa[0], gopt) bwa.cal_sa_reg_gap(0, bwts, l, bwsa[1], gopt) print_meminfo("AFTER CAL_SA_REG_GAP") cnt_chg = bwa.cal_pac_pos_pe(bwts, l, bwsa, ii, popt, gopt, last_ii) print_meminfo("AFTER CAL_PAC_POS_PE") bwa.paired_sw(bnsp, pacseq, l, bwsa, popt, ii) print_meminfo("AFTER PAIRED_SW") bwa.refine_gapped(bnsp, l, bwsa[0], pacseq) bwa.refine_gapped(bnsp, l, bwsa[1], pacseq) print_meminfo("AFTER REFINE_GAPPED") for k in xrange(l): v1 = bwa.analyze_hit(gopt[0], bnsp, bwsa[0][k], bwsa[1][k]) v2 = bwa.analyze_hit(gopt[0], bnsp, bwsa[1][k], bwsa[0][k]) print_meminfo("AFTER ANALYZE_HIT") # deallocate seq & ref data for i in 0, 1: bwa.free_seq(l, bwsa[i]) bwa.bwt_destroy(bwts[i]) bwa.bns_destroy(bnsp) print_meminfo("AFTER DEALLOC") del pacseq n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) print_meminfo("AFTER DEL PACSEQ") del pairs n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) print_meminfo("AFTER DEL PAIRS")
def main(argv): try: refseq_fname = argv[1] read_fname = argv[2] mate_fname = argv[3] except IndexError: sys.exit("Usage: %s REFSEQ_FN READ_FN MATE_FN" % sys.argv[0]) seq_list_len = 10000 max_isize = pairing_batch_size = 10000 gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina') mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina') pairs_flow = it.izip(read_flow, mate_flow) pairs = list(it.islice(pairs_flow, 0, seq_list_len)) bwts = bwa.restore_index(refseq_fname) bnsp, pacseq = bwa.restore_reference(refseq_fname) l = len(pairs) bwsa = bwa.build_bws_array(pairs) bwa_iterator = BWAIterator(refseq_fname, gopt, popt, max_isize, pairing_batch_size) pairs = [p for p in bwa_iterator.analyze(bwsa, l)] print "READ POS GAPO GAPE MM STRAND SCORE CIGAR" for read, mate in pairs: if read.n_multi > 0: print multi_list = [m for m in read.itermulti()] for m in multi_list: print read.get_name(), m.pos, m.n_gapo, m.n_gape, m.n_mm, m.strand, \ m.score, m.get_cigar(read.len) for j in 0, 1: bwa.free_seq(l, bwsa[j]) bwa.bns_destroy(bwa_iterator.bnsp)
def run_bwa_py_sampe(refseq_fname, read_fname, mate_fname, seq_list_len=None): size_list = [] resident_list = [] failed_ii = 0 read_flow = Bio.SeqIO.parse(open(read_fname), "fastq-illumina") mate_flow = Bio.SeqIO.parse(open(mate_fname), "fastq-illumina") # pairs = [x for x in it.izip(read_flow, mate_flow)] pairs_flow = it.izip(read_flow, mate_flow) while 1: pairs = list(it.islice(pairs_flow, 0, seq_list_len)) if len(pairs) == 0: break size, resident = print_meminfo("AFTER READING PAIRS") size_list.append(size) resident_list.append(resident) bwsa = bwa.build_bws_array(pairs) size, resident = print_meminfo("AFTER BUILDING BWSA") size_list.append(size) resident_list.append(resident) bwts = bwa.restore_index(refseq_fname) size, resident = print_meminfo("AFTER RESTORING INDEX") size_list.append(size) resident_list.append(resident) bnsp, pacseq = bwa.restore_reference(refseq_fname) size, resident = print_meminfo("AFTER RESTORING REFERENCE") size_list.append(size) resident_list.append(resident) gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() ii, last_ii = bwa.isize_info_t(), bwa.isize_info_t() last_ii.avg = -1.0 l = len(pairs) size, resident = print_meminfo("AFTER INIT OPT & II") size_list.append(size) resident_list.append(resident) bwa.cal_sa_reg_gap(0, bwts, l, bwsa[0], gopt) bwa.cal_sa_reg_gap(0, bwts, l, bwsa[1], gopt) size, resident = print_meminfo("AFTER CAL_SA_REG_GAP") size_list.append(size) resident_list.append(resident) cnt_chg = bwa.cal_pac_pos_pe(bwts, l, bwsa, ii, popt, gopt, last_ii) size, resident = print_meminfo("AFTER CAL_PAC_POS_PE") size_list.append(size) resident_list.append(resident) # sys.stderr.write("ii=%f\n" % ii.avg) if ii.avg < 0.0: failed_ii += 1 bwa.paired_sw(bnsp, pacseq, l, bwsa, popt, ii) size, resident = print_meminfo("AFTER PAIRED_SW") size_list.append(size) resident_list.append(resident) bwa.refine_gapped(bnsp, l, bwsa[0], pacseq) bwa.refine_gapped(bnsp, l, bwsa[1], pacseq) size, resident = print_meminfo("AFTER REFINE_GAPPED") size_list.append(size) resident_list.append(resident) for k in xrange(l): v1 = bwa.analyze_hit(gopt[0], bnsp, bwsa[0][k], bwsa[1][k]) v2 = bwa.analyze_hit(gopt[0], bnsp, bwsa[1][k], bwsa[0][k]) size, resident = print_meminfo("AFTER ANALYZE_HIT") size_list.append(size) resident_list.append(resident) # deallocate seq & ref data for i in 0, 1: bwa.free_seq(l, bwsa[i]) bwa.bwt_destroy(bwts[i]) bwa.bns_destroy(bnsp) size, resident = print_meminfo("AFTER DEALLOC") size_list.append(size) resident_list.append(resident) del pacseq n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) size, resident = print_meminfo("AFTER DEL PACSEQ") size_list.append(size) resident_list.append(resident) del pairs n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) size, resident = print_meminfo("AFTER DEL PAIRS") size_list.append(size) resident_list.append(resident) return max(size_list), max(resident_list), failed_ii
def run_bwa_py_sampe(refseq_fname, read_fname, mate_fname, seq_list_len=None): size_list = [] resident_list = [] failed_ii = 0 read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina') mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina') #pairs = [x for x in it.izip(read_flow, mate_flow)] pairs_flow = it.izip(read_flow, mate_flow) while 1: pairs = list(it.islice(pairs_flow, 0, seq_list_len)) if len(pairs) == 0: break size, resident = print_meminfo("AFTER READING PAIRS") size_list.append(size) resident_list.append(resident) bwsa = bwa.build_bws_array(pairs) size, resident = print_meminfo("AFTER BUILDING BWSA") size_list.append(size) resident_list.append(resident) bwts = bwa.restore_index(refseq_fname) size, resident = print_meminfo("AFTER RESTORING INDEX") size_list.append(size) resident_list.append(resident) bnsp, pacseq = bwa.restore_reference(refseq_fname) size, resident = print_meminfo("AFTER RESTORING REFERENCE") size_list.append(size) resident_list.append(resident) gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() ii, last_ii = bwa.isize_info_t(), bwa.isize_info_t() last_ii.avg = -1.0 l = len(pairs) size, resident = print_meminfo("AFTER INIT OPT & II") size_list.append(size) resident_list.append(resident) bwa.cal_sa_reg_gap(0, bwts, l, bwsa[0], gopt) bwa.cal_sa_reg_gap(0, bwts, l, bwsa[1], gopt) size, resident = print_meminfo("AFTER CAL_SA_REG_GAP") size_list.append(size) resident_list.append(resident) cnt_chg = bwa.cal_pac_pos_pe(bwts, l, bwsa, ii, popt, gopt, last_ii) size, resident = print_meminfo("AFTER CAL_PAC_POS_PE") size_list.append(size) resident_list.append(resident) #sys.stderr.write("ii=%f\n" % ii.avg) if ii.avg < 0.0: failed_ii += 1 bwa.paired_sw(bnsp, pacseq, l, bwsa, popt, ii) size, resident = print_meminfo("AFTER PAIRED_SW") size_list.append(size) resident_list.append(resident) bwa.refine_gapped(bnsp, l, bwsa[0], pacseq) bwa.refine_gapped(bnsp, l, bwsa[1], pacseq) size, resident = print_meminfo("AFTER REFINE_GAPPED") size_list.append(size) resident_list.append(resident) for k in xrange(l): v1 = bwa.analyze_hit(gopt[0], bnsp, bwsa[0][k], bwsa[1][k]) v2 = bwa.analyze_hit(gopt[0], bnsp, bwsa[1][k], bwsa[0][k]) size, resident = print_meminfo("AFTER ANALYZE_HIT") size_list.append(size) resident_list.append(resident) # deallocate seq & ref data for i in 0, 1: bwa.free_seq(l, bwsa[i]) bwa.bwt_destroy(bwts[i]) bwa.bns_destroy(bnsp) size, resident = print_meminfo("AFTER DEALLOC") size_list.append(size) resident_list.append(resident) del pacseq n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) size, resident = print_meminfo("AFTER DEL PACSEQ") size_list.append(size) resident_list.append(resident) del pairs n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) size, resident = print_meminfo("AFTER DEL PAIRS") size_list.append(size) resident_list.append(resident) return max(size_list), max(resident_list), failed_ii