Ejemplo n.º 1
0
    def test_rem_dash(self):
        # ****--****-------****  4M2D4M7D4M
        # *******-------*******  7M7D7M
        # has 4 dash columns and should become
        # ****--****---****      4M2D4M3D4M
        # *******---*******      7M3D7M

        for i in range(100):
            dash_cols = random.randint(0, 10)
            tStart = random.randint(0, 1000)
            qStart = random.randint(0, 1000)
            epo_pair = (
                EPOitem._strfactory("homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart+12-1, "4M2D4M%dD4M" % (dash_cols+3))),
                EPOitem._strfactory("mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart+14-1, "7M%dD7M" % (dash_cols+3))))
            chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800})
            ti = epo_pair[0].intervals(False)
            qi = epo_pair[1].intervals(False)
            assert ti[2][0] - ti[1][1] - dash_cols == chain[2][1]
            assert qi[1][0] - qi[0][1] - dash_cols == chain[2][1]

        # ----*****
        # *-------*
        # has 3 dash cols and should become
        # *
        # *
        # with the qStart += 1 and tStart += 4

        for i in range(100):
            dash_cols = random.randint(0, 10)
            tm = random.randint(6, 10)
            qm = random.randint(1, 5)

            tStart = random.randint(0, 1000)
            qStart = random.randint(0, 1000)

            epo_pair = (
                EPOitem._strfactory("homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart+tm-1, "%dD%dM" % (dash_cols+1, tm))),
                EPOitem._strfactory("mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart+qm+1-1, "M%dD%dM" % (dash_cols+tm-qm, qm))))
            chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800})
            if chain[1][-1] != qm:
                pdb.set_trace()
            assert chain[1][-1] == qm
            # correct also for coordinate interpretation differences between UCSC and EPO
            assert (qStart + 1) - 1 == chain[0].qStart, "%d != %d" % (qStart + 1, chain[0].qStart)
Ejemplo n.º 2
0
def convert_action(trg_comp, qr_comp, ts, qs, opt):
    for i, (a,b) in enumerate(product(trg_comp, qr_comp)):
        try:
            ch, S, T, Q = Chain._make_from_epo(a, b, ts, qs)
            if np.sum(S) == 0:
                log.info("insignificant genomic alignment block %s ..." % ch.id)
                continue
            new_id = "%si%d" % (ch.id, i)
            print >>opt.output, str(ch._replace(id=new_id))
            map(lambda tup: opt.output.write("%d %d %d\n" % tup), izip(S,T,Q))
            print >>opt.output, "%d\n" % S[-1]
        except KeyError:
            log.warning("skipping chromosome/contig (%s, %s)" % (a.chrom, b.chrom))
Ejemplo n.º 3
0
def convert_action(trg_comp, qr_comp, ts, qs, opt):
    for i, (a,b) in enumerate(product(trg_comp, qr_comp)):
        try:
            ch, S, T, Q = Chain._make_from_epo(a, b, ts, qs)
            if np.sum(S) == 0:
                log.info("insignificant genomic alignment block %s ..." % ch.id)
                continue
            new_id = "%si%d" % (ch.id, i)
            print(str(ch._replace(id=new_id)), file=opt.output)
            map(lambda tup: opt.output.write("%d %d %d\n" % tup), zip(S, T, Q))
            print("%d\n" % S[-1], file=opt.output)
        except KeyError:
            log.warning("skipping chromosome/contig (%s, %s)" % (a.chrom, b.chrom))
Ejemplo n.º 4
0
    def test_make_chain(self):
        def cch(cigar, s, e):
            return cigar[s:e].find('-') == -1

        for p in self.epo_records:
            chain = Chain._make_from_epo(p[0], p[1], {"chr1": 500}, {"chr1": 800})
            if not chain:
                continue
            ch, S, T, Q = chain
            i = int(ch.id)
            c1, c2 = cigar_pairs[i]
            if p[0].strand == '-':
                c1 = c1[::-1]
                c2 = c2[::-1]
            th = 0
            for s, t, q in zip(S, T, Q):
                if not (cch(c1, th, th+s) and cch(c2, th, th+s)):
                    pdb.set_trace()
                assert cch(c1, th, th+s) and cch(c2, th, th+s), "%s and %s" % (c1[th:th+s], c2[th:th+s])
                if t > q:
                    cch(c1, th+s, th+s+t) and c1[th+s:th+s+t] == '-'*t
                else:
                    cch(c2, th+s, th+s+q) and c1[th+s:th+s+q] == '-'*q
                th = th + s + max(t, q)