Esempio n. 1
0
 def setUp(self):
     self.epo_records = []
     for i, (t, q) in enumerate(cigar_pairs):
         gab_pair = (toCigar("homo_sapiens", i, t), toCigar("mus_musculus", i, q))
         A = EPOitem._strfactory(gab_pair[0])
         B = EPOitem._strfactory(gab_pair[1])
         if A and B:
             self.epo_records.append((A, B))
Esempio n. 2
0
    def test_rem_dash(self):
        # ****--****-------****  4M2D4M7D4M
        # *******-------*******  7M7D7M
        # has 4 dash columns and should become
        # ****--****---****      4M2D4M3D4M
        # *******---*******      7M3D7M

        for i in range(100):
            dash_cols = random.randint(0, 10)
            tStart = random.randint(0, 1000)
            qStart = random.randint(0, 1000)
            epo_pair = (
                EPOitem._strfactory("homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart+12-1, "4M2D4M%dD4M" % (dash_cols+3))),
                EPOitem._strfactory("mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart+14-1, "7M%dD7M" % (dash_cols+3))))
            chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800})
            ti = epo_pair[0].intervals(False)
            qi = epo_pair[1].intervals(False)
            assert ti[2][0] - ti[1][1] - dash_cols == chain[2][1]
            assert qi[1][0] - qi[0][1] - dash_cols == chain[2][1]

        # ----*****
        # *-------*
        # has 3 dash cols and should become
        # *
        # *
        # with the qStart += 1 and tStart += 4

        for i in range(100):
            dash_cols = random.randint(0, 10)
            tm = random.randint(6, 10)
            qm = random.randint(1, 5)

            tStart = random.randint(0, 1000)
            qStart = random.randint(0, 1000)

            epo_pair = (
                EPOitem._strfactory("homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart+tm-1, "%dD%dM" % (dash_cols+1, tm))),
                EPOitem._strfactory("mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart+qm+1-1, "M%dD%dM" % (dash_cols+tm-qm, qm))))
            chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800})
            if chain[1][-1] != qm:
                pdb.set_trace()
            assert chain[1][-1] == qm
            # correct also for coordinate interpretation differences between UCSC and EPO
            assert (qStart + 1) - 1 == chain[0].qStart, "%d != %d" % (qStart + 1, chain[0].qStart)
    parser = argparse.ArgumentParser(description="""EPO alignments (.out) to .chain converter.""",
            epilog="Olgert Denas (Taylor Lab)",
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("input", help="File to process.")
    parser.add_argument("--species", nargs=2, default=["homo_sapiens", "mus_musculus"],
            help="Names of target and query species (respectively) in the alignment.")
    parser.add_argument("--chrsizes", nargs=2, required=True,
            help="Chromosome sizes for the given species.")
    parser.add_argument("-o", '--output', metavar="FILE", default='stdout', type=outFile, help="Output file")

    opt = parser.parse_args()

    log.info("loading sizes ...")
    tsizes = loadChrSizes(opt.chrsizes[0])
    qsizes = loadChrSizes(opt.chrsizes[1])

    log.info("loading alignments ...")
    data = EPOitem._parse_epo(opt.input)

    log.info("dumping ...")
    for k in data:
        components = data[k]
        trg_comp = filter(lambda c: c.species == opt.species[0], components)
        qr_comp = filter(lambda c: c.species == opt.species[1], components)

        convert_action(trg_comp, qr_comp, tsizes, qsizes, opt)



Esempio n. 4
0
        default=["homo_sapiens", "mus_musculus"],
        help=
        "Names of target and query species (respectively) in the alignment.")
    parser.add_argument("--chrsizes",
                        nargs=2,
                        required=True,
                        help="Chromosome sizes for the given species.")
    parser.add_argument("-o",
                        '--output',
                        metavar="FILE",
                        default='stdout',
                        type=outFile,
                        help="Output file")

    opt = parser.parse_args()

    log.info("loading sizes ...")
    tsizes = loadChrSizes(opt.chrsizes[0])
    qsizes = loadChrSizes(opt.chrsizes[1])

    log.info("loading alignments ...")
    data = EPOitem._parse_epo(opt.input)

    log.info("dumping ...")
    for k in data:
        components = data[k]
        trg_comp = filter(lambda c: c.species == opt.species[0], components)
        qr_comp = filter(lambda c: c.species == opt.species[1], components)

        convert_action(trg_comp, qr_comp, tsizes, qsizes, opt)
Esempio n. 5
0
        default=["homo_sapiens", "mus_musculus"],
        help=
        "Names of target and query species (respectively) in the alignment.")
    parser.add_argument("--chrsizes",
                        nargs=2,
                        required=True,
                        help="Chromosome sizes for the given species.")
    parser.add_argument("-o",
                        '--output',
                        metavar="FILE",
                        default='stdout',
                        type=outFile,
                        help="Output file")

    opt = parser.parse_args()

    log.info("loading sizes ...")
    tsizes = loadChrSizes(opt.chrsizes[0])
    qsizes = loadChrSizes(opt.chrsizes[1])

    log.info("loading alignments ...")
    data = OrderedDict(sorted(EPOitem._parse_epo(opt.input).items()))

    log.info("dumping ...")
    for k in data:
        components = data[k]
        trg_comp = [c for c in components if c.species == opt.species[0]]
        qr_comp = [c for c in components if c.species == opt.species[1]]

        convert_action(trg_comp, qr_comp, tsizes, qsizes, opt)
Esempio n. 6
0
            log.warning("skipping chromosome/contig (%s, %s)" % (a.chrom, b.chrom))

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="""EPO alignments (.out) to .chain converter.""",
            epilog="Olgert Denas (Taylor Lab)",
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("input", help="File to process.")
    parser.add_argument("--species", nargs=2, default=["homo_sapiens", "mus_musculus"],
            help="Names of target and query species (respectively) in the alignment.")
    parser.add_argument("--chrsizes", nargs=2, required=True,
            help="Chromosome sizes for the given species.")
    parser.add_argument("-o", '--output', metavar="FILE", default='stdout', type=outFile, help="Output file")

    opt = parser.parse_args()

    log.info("loading sizes ...")
    tsizes = loadChrSizes(opt.chrsizes[0])
    qsizes = loadChrSizes(opt.chrsizes[1])

    log.info("loading alignments ...")
    data = OrderedDict(sorted(EPOitem._parse_epo(opt.input).items()))

    log.info("dumping ...")
    for k in data:
        components = data[k]
        trg_comp = [c for c in components if c.species == opt.species[0]]
        qr_comp = [c for c in components if c.species == opt.species[1]]

        convert_action(trg_comp, qr_comp, tsizes, qsizes, opt)