def process_sequences(bwts, bns, pacseq, seq_reader, N, analyze_seqs=None): gopt = bwa.gap_init_opt() popt = bwa.pe_init_opt() ii = bwa.isize_info_t() last_ii = bwa.isize_info_t() last_ii.avg = -1.0 while 1: pairs = read_seq_pairs(seq_reader, N) seq_pairs_read = len(pairs) if seq_pairs_read == 0: break bwsa = bwa.build_bws_array(pairs) bwa.cal_sa_reg_gap(0, bwts, seq_pairs_read, bwsa[0], gopt) bwa.cal_sa_reg_gap(0, bwts, seq_pairs_read, bwsa[1], gopt) cnt_chg = bwa.cal_pac_pos_pe(bwts, seq_pairs_read, bwsa, ii, popt, gopt, last_ii) sys.stderr.write('ii: %r\n' %[ii.avg, ii.std, ii.low, ii.high]) bwa.paired_sw(bns, pacseq, seq_pairs_read, bwsa, popt, ii) bwa.refine_gapped(bns, seq_pairs_read, bwsa[0], pacseq) bwa.refine_gapped(bns, seq_pairs_read, bwsa[1], pacseq) analyze_seqs(gopt[0], bns, seq_pairs_read, bwsa) bwa.free_seq(N, bwsa[0]) bwa.free_seq(N, bwsa[1])
def process_sequences(bwts, bns, pacseq, seq_reader, N, analyze_seqs=None): gopt = bwa.gap_init_opt() popt = bwa.pe_init_opt() ii = bwa.isize_info_t() last_ii = bwa.isize_info_t() last_ii.avg = -1.0 while 1: pairs = read_seq_pairs(seq_reader, N) seq_pairs_read = len(pairs) if seq_pairs_read == 0: break bwsa = bwa.build_bws_array(pairs) bwa.cal_sa_reg_gap(0, bwts, seq_pairs_read, bwsa[0], gopt) bwa.cal_sa_reg_gap(0, bwts, seq_pairs_read, bwsa[1], gopt) cnt_chg = bwa.cal_pac_pos_pe(bwts, seq_pairs_read, bwsa, ii, popt, gopt, last_ii) sys.stderr.write('ii: %r\n' % [ii.avg, ii.std, ii.low, ii.high]) bwa.paired_sw(bns, pacseq, seq_pairs_read, bwsa, popt, ii) bwa.refine_gapped(bns, seq_pairs_read, bwsa[0], pacseq) bwa.refine_gapped(bns, seq_pairs_read, bwsa[1], pacseq) analyze_seqs(gopt[0], bns, seq_pairs_read, bwsa) bwa.free_seq(N, bwsa[0]) bwa.free_seq(N, bwsa[1])
def run_bwa_py_sampe(refseq_fname, read_fname, mate_fname): read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina') mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina') pairs = [x for x in it.izip(read_flow, mate_flow)] print_meminfo("AFTER READING PAIRS") bwsa = bwa.build_bws_array(pairs) print_meminfo("AFTER BUILDING BWSA") bwts = bwa.restore_index(refseq_fname) print_meminfo("AFTER RESTORING INDEX") bnsp, pacseq = bwa.restore_reference(refseq_fname) print_meminfo("AFTER RESTORING REFERENCE") gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() ii, last_ii = bwa.isize_info_t(), bwa.isize_info_t() last_ii.avg = -1.0 l = len(pairs) print_meminfo("AFTER INIT OPT & II") bwa.cal_sa_reg_gap(0, bwts, l, bwsa[0], gopt) bwa.cal_sa_reg_gap(0, bwts, l, bwsa[1], gopt) print_meminfo("AFTER CAL_SA_REG_GAP") cnt_chg = bwa.cal_pac_pos_pe(bwts, l, bwsa, ii, popt, gopt, last_ii) print_meminfo("AFTER CAL_PAC_POS_PE") bwa.paired_sw(bnsp, pacseq, l, bwsa, popt, ii) print_meminfo("AFTER PAIRED_SW") bwa.refine_gapped(bnsp, l, bwsa[0], pacseq) bwa.refine_gapped(bnsp, l, bwsa[1], pacseq) print_meminfo("AFTER REFINE_GAPPED") for k in xrange(l): v1 = bwa.analyze_hit(gopt[0], bnsp, bwsa[0][k], bwsa[1][k]) v2 = bwa.analyze_hit(gopt[0], bnsp, bwsa[1][k], bwsa[0][k]) print_meminfo("AFTER ANALYZE_HIT") # deallocate seq & ref data for i in 0, 1: bwa.free_seq(l, bwsa[i]) bwa.bwt_destroy(bwts[i]) bwa.bns_destroy(bnsp) print_meminfo("AFTER DEALLOC") del pacseq n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) print_meminfo("AFTER DEL PACSEQ") del pairs n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) print_meminfo("AFTER DEL PAIRS")
def run_bwa_py_sampe(refseq_fname, read_fname, mate_fname, seq_list_len=None): size_list = [] resident_list = [] failed_ii = 0 read_flow = Bio.SeqIO.parse(open(read_fname), "fastq-illumina") mate_flow = Bio.SeqIO.parse(open(mate_fname), "fastq-illumina") # pairs = [x for x in it.izip(read_flow, mate_flow)] pairs_flow = it.izip(read_flow, mate_flow) while 1: pairs = list(it.islice(pairs_flow, 0, seq_list_len)) if len(pairs) == 0: break size, resident = print_meminfo("AFTER READING PAIRS") size_list.append(size) resident_list.append(resident) bwsa = bwa.build_bws_array(pairs) size, resident = print_meminfo("AFTER BUILDING BWSA") size_list.append(size) resident_list.append(resident) bwts = bwa.restore_index(refseq_fname) size, resident = print_meminfo("AFTER RESTORING INDEX") size_list.append(size) resident_list.append(resident) bnsp, pacseq = bwa.restore_reference(refseq_fname) size, resident = print_meminfo("AFTER RESTORING REFERENCE") size_list.append(size) resident_list.append(resident) gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() ii, last_ii = bwa.isize_info_t(), bwa.isize_info_t() last_ii.avg = -1.0 l = len(pairs) size, resident = print_meminfo("AFTER INIT OPT & II") size_list.append(size) resident_list.append(resident) bwa.cal_sa_reg_gap(0, bwts, l, bwsa[0], gopt) bwa.cal_sa_reg_gap(0, bwts, l, bwsa[1], gopt) size, resident = print_meminfo("AFTER CAL_SA_REG_GAP") size_list.append(size) resident_list.append(resident) cnt_chg = bwa.cal_pac_pos_pe(bwts, l, bwsa, ii, popt, gopt, last_ii) size, resident = print_meminfo("AFTER CAL_PAC_POS_PE") size_list.append(size) resident_list.append(resident) # sys.stderr.write("ii=%f\n" % ii.avg) if ii.avg < 0.0: failed_ii += 1 bwa.paired_sw(bnsp, pacseq, l, bwsa, popt, ii) size, resident = print_meminfo("AFTER PAIRED_SW") size_list.append(size) resident_list.append(resident) bwa.refine_gapped(bnsp, l, bwsa[0], pacseq) bwa.refine_gapped(bnsp, l, bwsa[1], pacseq) size, resident = print_meminfo("AFTER REFINE_GAPPED") size_list.append(size) resident_list.append(resident) for k in xrange(l): v1 = bwa.analyze_hit(gopt[0], bnsp, bwsa[0][k], bwsa[1][k]) v2 = bwa.analyze_hit(gopt[0], bnsp, bwsa[1][k], bwsa[0][k]) size, resident = print_meminfo("AFTER ANALYZE_HIT") size_list.append(size) resident_list.append(resident) # deallocate seq & ref data for i in 0, 1: bwa.free_seq(l, bwsa[i]) bwa.bwt_destroy(bwts[i]) bwa.bns_destroy(bnsp) size, resident = print_meminfo("AFTER DEALLOC") size_list.append(size) resident_list.append(resident) del pacseq n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) size, resident = print_meminfo("AFTER DEL PACSEQ") size_list.append(size) resident_list.append(resident) del pairs n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) size, resident = print_meminfo("AFTER DEL PAIRS") size_list.append(size) resident_list.append(resident) return max(size_list), max(resident_list), failed_ii
def run_bwa_py_sampe(refseq_fname, read_fname, mate_fname, seq_list_len=None): size_list = [] resident_list = [] failed_ii = 0 read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina') mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina') #pairs = [x for x in it.izip(read_flow, mate_flow)] pairs_flow = it.izip(read_flow, mate_flow) while 1: pairs = list(it.islice(pairs_flow, 0, seq_list_len)) if len(pairs) == 0: break size, resident = print_meminfo("AFTER READING PAIRS") size_list.append(size) resident_list.append(resident) bwsa = bwa.build_bws_array(pairs) size, resident = print_meminfo("AFTER BUILDING BWSA") size_list.append(size) resident_list.append(resident) bwts = bwa.restore_index(refseq_fname) size, resident = print_meminfo("AFTER RESTORING INDEX") size_list.append(size) resident_list.append(resident) bnsp, pacseq = bwa.restore_reference(refseq_fname) size, resident = print_meminfo("AFTER RESTORING REFERENCE") size_list.append(size) resident_list.append(resident) gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt() ii, last_ii = bwa.isize_info_t(), bwa.isize_info_t() last_ii.avg = -1.0 l = len(pairs) size, resident = print_meminfo("AFTER INIT OPT & II") size_list.append(size) resident_list.append(resident) bwa.cal_sa_reg_gap(0, bwts, l, bwsa[0], gopt) bwa.cal_sa_reg_gap(0, bwts, l, bwsa[1], gopt) size, resident = print_meminfo("AFTER CAL_SA_REG_GAP") size_list.append(size) resident_list.append(resident) cnt_chg = bwa.cal_pac_pos_pe(bwts, l, bwsa, ii, popt, gopt, last_ii) size, resident = print_meminfo("AFTER CAL_PAC_POS_PE") size_list.append(size) resident_list.append(resident) #sys.stderr.write("ii=%f\n" % ii.avg) if ii.avg < 0.0: failed_ii += 1 bwa.paired_sw(bnsp, pacseq, l, bwsa, popt, ii) size, resident = print_meminfo("AFTER PAIRED_SW") size_list.append(size) resident_list.append(resident) bwa.refine_gapped(bnsp, l, bwsa[0], pacseq) bwa.refine_gapped(bnsp, l, bwsa[1], pacseq) size, resident = print_meminfo("AFTER REFINE_GAPPED") size_list.append(size) resident_list.append(resident) for k in xrange(l): v1 = bwa.analyze_hit(gopt[0], bnsp, bwsa[0][k], bwsa[1][k]) v2 = bwa.analyze_hit(gopt[0], bnsp, bwsa[1][k], bwsa[0][k]) size, resident = print_meminfo("AFTER ANALYZE_HIT") size_list.append(size) resident_list.append(resident) # deallocate seq & ref data for i in 0, 1: bwa.free_seq(l, bwsa[i]) bwa.bwt_destroy(bwts[i]) bwa.bns_destroy(bnsp) size, resident = print_meminfo("AFTER DEALLOC") size_list.append(size) resident_list.append(resident) del pacseq n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) size, resident = print_meminfo("AFTER DEL PACSEQ") size_list.append(size) resident_list.append(resident) del pairs n_unreachable = gc.collect() logging.debug("n_unreachable = %d" % n_unreachable) size, resident = print_meminfo("AFTER DEL PAIRS") size_list.append(size) resident_list.append(resident) return max(size_list), max(resident_list), failed_ii