def test_rem_dash(self): # ****--****-------**** 4M2D4M7D4M # *******-------******* 7M7D7M # has 4 dash columns and should become # ****--****---**** 4M2D4M3D4M # *******---******* 7M3D7M for i in range(100): dash_cols = random.randint(0, 10) tStart = random.randint(0, 1000) qStart = random.randint(0, 1000) epo_pair = ( EPOitem._strfactory("homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart+12-1, "4M2D4M%dD4M" % (dash_cols+3))), EPOitem._strfactory("mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart+14-1, "7M%dD7M" % (dash_cols+3)))) chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800}) ti = epo_pair[0].intervals(False) qi = epo_pair[1].intervals(False) assert ti[2][0] - ti[1][1] - dash_cols == chain[2][1] assert qi[1][0] - qi[0][1] - dash_cols == chain[2][1] # ----***** # *-------* # has 3 dash cols and should become # * # * # with the qStart += 1 and tStart += 4 for i in range(100): dash_cols = random.randint(0, 10) tm = random.randint(6, 10) qm = random.randint(1, 5) tStart = random.randint(0, 1000) qStart = random.randint(0, 1000) epo_pair = ( EPOitem._strfactory("homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart+tm-1, "%dD%dM" % (dash_cols+1, tm))), EPOitem._strfactory("mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart+qm+1-1, "M%dD%dM" % (dash_cols+tm-qm, qm)))) chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800}) if chain[1][-1] != qm: pdb.set_trace() assert chain[1][-1] == qm # correct also for coordinate interpretation differences between UCSC and EPO assert (qStart + 1) - 1 == chain[0].qStart, "%d != %d" % (qStart + 1, chain[0].qStart)
def convert_action(trg_comp, qr_comp, ts, qs, opt): for i, (a,b) in enumerate(product(trg_comp, qr_comp)): try: ch, S, T, Q = Chain._make_from_epo(a, b, ts, qs) if np.sum(S) == 0: log.info("insignificant genomic alignment block %s ..." % ch.id) continue new_id = "%si%d" % (ch.id, i) print >>opt.output, str(ch._replace(id=new_id)) map(lambda tup: opt.output.write("%d %d %d\n" % tup), izip(S,T,Q)) print >>opt.output, "%d\n" % S[-1] except KeyError: log.warning("skipping chromosome/contig (%s, %s)" % (a.chrom, b.chrom))
def convert_action(trg_comp, qr_comp, ts, qs, opt): for i, (a,b) in enumerate(product(trg_comp, qr_comp)): try: ch, S, T, Q = Chain._make_from_epo(a, b, ts, qs) if np.sum(S) == 0: log.info("insignificant genomic alignment block %s ..." % ch.id) continue new_id = "%si%d" % (ch.id, i) print(str(ch._replace(id=new_id)), file=opt.output) map(lambda tup: opt.output.write("%d %d %d\n" % tup), zip(S, T, Q)) print("%d\n" % S[-1], file=opt.output) except KeyError: log.warning("skipping chromosome/contig (%s, %s)" % (a.chrom, b.chrom))
def test_make_chain(self): def cch(cigar, s, e): return cigar[s:e].find('-') == -1 for p in self.epo_records: chain = Chain._make_from_epo(p[0], p[1], {"chr1": 500}, {"chr1": 800}) if not chain: continue ch, S, T, Q = chain i = int(ch.id) c1, c2 = cigar_pairs[i] if p[0].strand == '-': c1 = c1[::-1] c2 = c2[::-1] th = 0 for s, t, q in zip(S, T, Q): if not (cch(c1, th, th+s) and cch(c2, th, th+s)): pdb.set_trace() assert cch(c1, th, th+s) and cch(c2, th, th+s), "%s and %s" % (c1[th:th+s], c2[th:th+s]) if t > q: cch(c1, th+s, th+s+t) and c1[th+s:th+s+t] == '-'*t else: cch(c2, th+s, th+s+q) and c1[th+s:th+s+q] == '-'*q th = th + s + max(t, q)