def test_chainAlignments_Case1(self):
        """Test chainAlignment case1: simple chain"""

        al1 = Alignment('q1',
                        's1',
                        1000,
                        1200,
                        2000,
                        2200,
                        200,
                        200,
                        1,
                        1,
                        id=1)
        al2 = Alignment('q1',
                        's1',
                        1500,
                        2000,
                        3000,
                        3500,
                        500,
                        500,
                        1,
                        1,
                        id=2)
        lAlignments = [al1, al2]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
        self.assertEquals([Chain([al1, al2])], iAlgmtChainer.lChains)
        self.assertEquals({1: [0], 2: [0]}, iAlgmtChainer.dIndex)
    def test_removeInternalAlignments(self):
        """..."""

        al1 = Alignment('Chr1',
                        'Chr1',
                        12806596,
                        12809714,
                        12796459,
                        12799562,
                        3123,
                        3123,
                        1,
                        1,
                        id=1)
        al2 = Alignment('Chr1',
                        'Chr1',
                        12810507,
                        12813088,
                        12796459,
                        12799028,
                        2592,
                        2592,
                        1,
                        1,
                        id=2)
        lAlignments = [al1, al2]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
    def test_distanceBetweenQueryAlgmts(self):
        """Test distanceBetweenQueryAlgmts"""

        algmt1 = Alignment('q1', 's1', 20, 30, 10, 20, 10, 10, 1, 1, id=1)
        algmt2 = Alignment('q1', 's1', 100, 110, 50, 60, 10, 10, 1, 1, id=2)
        algmt3 = Alignment('q1', 's1', 100, 110, 10, 20, 10, 10, 1, 1, id=3)
        algmt4 = Alignment('q1', 's1', 20, 30, 50, 60, 10, 10, 1, 1, id=4)
        algmt5 = Alignment('q1', 's1', 10, 30, 10, 20, 10, 10, 1, 1, id=5)
        algmt6 = Alignment('q1', 's1', 20, 50, 50, 60, 10, 10, 1, 1, id=6)
        algmt7 = Alignment('q1', 's1', 10, 50, 10, 20, 10, 10, 1, 1, id=7)
        algmt8 = Alignment('q1', 's1', 20, 30, 50, 60, 10, 10, 1, 1, id=8)
        algmt9 = Alignment('q1', 's1', 20, 50, 10, 20, 10, 10, 1, 1, id=9)
        algmt10 = Alignment('q1', 's1', 10, 30, 50, 60, 10, 10, 1, 1, id=10)
        algmt11 = Alignment('q1', 's1', 20, 30, 10, 20, 10, 10, 1, 1, id=11)
        algmt12 = Alignment('q1', 's1', 10, 50, 50, 60, 10, 10, 1, 1, id=12)

        iAlgmtChainer = AlignmentChainer(self.db)
        self.assertEquals(
            69, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt1, algmt2),
            "1 before 2")
        self.assertEquals(
            69, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt3, algmt4),
            "2 before 1")
        self.assertEquals(
            -11, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt5, algmt6),
            "1 start , 2 overlap")
        self.assertEquals(
            -31, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt7, algmt8),
            "1 start before, 2 nested")
        self.assertEquals(
            -11, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt9, algmt10),
            "2 starrt before, 1 overlap")
        self.assertEquals(
            -31, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt11, algmt12),
            "2 start before, 1 nested")
Example #4
0
def chainSbjctQueryAlgmts(db, maxGap, chainLength, sbjct, query, lAlgmts):

    lSelectedChains = []
    logging.debug('Chaining Alignment with subject: {} and query: {}'.format(
        sbjct, query))
    chainer = AlignmentChainer(db, maxGap=maxGap)
    chainer.chainAlignments(lAlgmts, multiproc=True)
    nb_selected_chains = 0
    for chain in chainer.lChains:
        if chain.getLength() > chainLength:
            lSelectedChains.append(chain)
            nb_selected_chains += 1
    logging.info('Selecting {} chains with subject: {} and query: {}'.format(
        nb_selected_chains, sbjct, query))

    return lSelectedChains
Example #5
0
    def _chainSbjctQueryAlgmts(self, sbjct, query):

        lSelectedChains = []
        logging.debug(
            'Chaining Alignment with subject: {} and query: {}'.format(
                sbjct, query))
        lAlgmts = self.db.selectAlignmentsWithDefinedSbjctAndQueryOrderBySbjctCoord(
            sbjct, query)
        chainer = AlignmentChainer(self.db, maxGap=maxGap)
        chainer.chainAlignments(lAlgmts)
        nb_selected_chains = 0
        for chain in chainer.lChains:
            if chain.getLength() > chainLength:
                lSelectedChains.append(chain)
                nb_selected_chains += 1
        logging.info(
            'Selecting {} chains with subject: {} and query: {}'.format(
                nb_selected_chains, sbjct, query))

        return lSelectedChains
Example #6
0
    def chainAlignments(self, maxGap=3000, chainLength=5000):
        """Chain Alignments"""

        lSbjcts = self.db.selectAllSbjcts()
        lQueries = self.db.selectAllQueries()
        lSelectedChains = []
        for sbjct in lSbjcts:
            for query in lQueries:
                logging.debug('Chaining Alignment with subject: {} and query: {}'.format(sbjct, query))
                lAlgmts = self.db.selectAlignmentsWithDefinedSbjctAndQueryOrderBySbjctCoord(sbjct,query)
                chainer = AlignmentChainer(self.db, maxGap=maxGap)
                chainer.chainAlignments(lAlgmts)

                for chain in chainer.lChains:
                    if chain.getLength() > chainLength:
                        lSelectedChains.append(chain)

        chainer2 = AlignmentChainer(self.db, maxGap=maxGap)
        self.lSortedChains = chainer2.sortListOfChains(lSelectedChains)
Example #7
0
    def removeDuplicationWithInternalSimilarity(self):
        """Remove Duplication with internal similarity"""

        chainer = AlignmentChainer(self.db)
        self.lSortedChains = chainer.removeChainsWithInternalSimilarity(self.lSortedChains)
Example #8
0
    def removeOverlappingDuplications(self):
        """Remove Overlapping Duplications"""

        chainer = AlignmentChainer(self.db)
        self.lSortedChains = chainer.removeOverlappingChains(self.lSortedChains)
    def test_chainAlignments_Case2(self):
        """Test chainAlignment case2: variable constraints"""

        al1 = Alignment('q1',
                        's1',
                        1000,
                        1200,
                        2000,
                        2200,
                        200,
                        200,
                        1,
                        1,
                        id=1)
        al2 = Alignment('q1',
                        's1',
                        1500,
                        2000,
                        3000,
                        3500,
                        500,
                        500,
                        1,
                        1,
                        id=2)
        al3 = Alignment('q1',
                        's1',
                        10000,
                        13000,
                        150000,
                        153000,
                        3000,
                        3000,
                        1,
                        -1,
                        id=3)
        al4 = Alignment('q1',
                        's1',
                        123000,
                        124000,
                        160000,
                        161000,
                        1000,
                        1000,
                        1,
                        1,
                        id=4)
        lAlignments = [al1, al2, al3, al4]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
        self.assertEquals(
            [Chain([al1, al2]), Chain([al3]),
             Chain([al4])], iAlgmtChainer.lChains)
        self.assertEquals({
            1: [0],
            2: [0],
            3: [1],
            4: [2]
        }, iAlgmtChainer.dIndex)
Example #10
0
    def test_removeOverlappingChains_case2(self):

        al1 = Alignment('Chr1',
                        'Chr1',
                        15088003,
                        15088582,
                        15253139,
                        15253732,
                        594,
                        594,
                        1,
                        1,
                        id=1)
        al2 = Alignment('Chr1',
                        'Chr1',
                        15094505,
                        15096966,
                        15254239,
                        15256725,
                        2499,
                        2499,
                        1,
                        1,
                        id=2)
        al3 = Alignment('Chr1',
                        'Chr1',
                        15090989,
                        15092375,
                        15257818,
                        15259247,
                        1432,
                        1432,
                        1,
                        1,
                        id=3)
        al4 = Alignment('Chr1',
                        'Chr1',
                        15098601,
                        15102401,
                        15259265,
                        15263155,
                        3904,
                        3904,
                        1,
                        1,
                        id=4)
        al5 = Alignment('Chr1',
                        'Chr1',
                        15088003,
                        15090006,
                        15257754,
                        15259751,
                        2021,
                        2021,
                        1,
                        1,
                        id=5)
        al6 = Alignment('Chr1',
                        'Chr1',
                        15090989,
                        15092375,
                        15257818,
                        15259247,
                        1432,
                        1432,
                        1,
                        1,
                        id=6)
        al7 = Alignment('Chr1',
                        'Chr1',
                        15094655,
                        15097013,
                        15258796,
                        15261191,
                        2410,
                        2410,
                        1,
                        1,
                        id=7)
        al8 = Alignment('Chr1',
                        'Chr1',
                        15098601,
                        15102401,
                        15259265,
                        15263155,
                        3904,
                        3904,
                        1,
                        1,
                        id=8)
        lAlignments = [al1, al2, al3, al4, al5, al6, al7, al8]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
Example #11
0
    def test_removeOverlappingChains(self):
        """Test removeOverlappingChains"""

        al1 = Alignment('q1',
                        's1',
                        1000,
                        1201,
                        2000,
                        2201,
                        202,
                        202,
                        1,
                        1,
                        id=1)
        al2 = Alignment('q1',
                        's1',
                        1500,
                        2001,
                        3000,
                        3501,
                        502,
                        502,
                        1,
                        1,
                        id=2)
        al3 = Alignment('q1',
                        's1',
                        1000,
                        1200,
                        2100,
                        2300,
                        201,
                        201,
                        1,
                        1,
                        id=3)
        al4 = Alignment('q1',
                        's1',
                        1500,
                        2000,
                        3100,
                        3600,
                        501,
                        501,
                        1,
                        1,
                        id=4)
        al5 = Alignment('q1',
                        's1',
                        123000,
                        124000,
                        160000,
                        161000,
                        1001,
                        1001,
                        1,
                        1,
                        id=5)
        al1b = Alignment('s1',
                         'q1',
                         2000,
                         2201,
                         1000,
                         1201,
                         202,
                         202,
                         1,
                         1,
                         id=16)
        al2b = Alignment('s1',
                         'q1',
                         3000,
                         3501,
                         1500,
                         2001,
                         502,
                         502,
                         1,
                         1,
                         id=11)
        al3b = Alignment('s1',
                         'q1',
                         2100,
                         2300,
                         1000,
                         1200,
                         201,
                         201,
                         1,
                         1,
                         id=31)
        al4b = Alignment('s1',
                         'q1',
                         3100,
                         3600,
                         1500,
                         2000,
                         501,
                         501,
                         1,
                         1,
                         id=41)
        lAlignments = [al1, al2, al3, al4, al5, al1b, al2b, al3b, al4b]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
Example #12
0
    def test_chainAlignments_Case3(self):
        """Test chainAlignment case3: complex chain with interleaved coordinates"""

        al1 = Alignment('q1',
                        's1',
                        1000,
                        1200,
                        2000,
                        2200,
                        200,
                        200,
                        1,
                        1,
                        id=1)
        al2 = Alignment('q1',
                        's1',
                        1500,
                        2000,
                        3000,
                        3500,
                        500,
                        500,
                        1,
                        1,
                        id=2)
        al3 = Alignment('q1',
                        's1',
                        10000,
                        13000,
                        150000,
                        153000,
                        3000,
                        3000,
                        1,
                        -1,
                        id=3)
        al4 = Alignment('q1',
                        's1',
                        11000,
                        14000,
                        154000,
                        157000,
                        3000,
                        3000,
                        1,
                        -1,
                        id=4)
        al5 = Alignment('q1',
                        's1',
                        123000,
                        124000,
                        160000,
                        161000,
                        1000,
                        1000,
                        1,
                        1,
                        id=5)
        lAlignments = [al1, al2, al3, al4, al5]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
        self.assertEquals(
            [Chain([al1, al2]),
             Chain([al3]),
             Chain([al4]),
             Chain([al5])], iAlgmtChainer.lChains)
        self.assertEquals({
            1: [0],
            2: [0],
            3: [1],
            4: [2],
            5: [3]
        }, iAlgmtChainer.dIndex)
Example #13
0
    def pairingChains(self):

        chainer = AlignmentChainer(self.db)
        self.lSortedChains = chainer.pairingChains(self.lSortedChains)
Example #14
0
    def chainAlignments(self, maxGap=3000, chainLength=5000):
        """Chain Alignments"""

        lSbjcts = sorted(self.db.selectAllSbjcts())
        lQueries = sorted(self.db.selectAllQueries())

        if lSbjcts != lQueries:
            raise 'error in list of subjects / queries of algmts'

        lSelectedChains = []

        if self.procs > 1:

            lDbs = []
            with open('dump.sql', 'w') as f:
                for line in self.db.conn.iterdump():
                    f.write('%s\n' % line)
            f.close()
            for i in range(0, self.procs):
                dest = sqlite3.connect('sddetector.{}.db'.format(i))
                cur = dest.cursor()
                f = open('dump.sql', 'r')
                sql = f.read()
                f.close()
                cur.executescript(sql)
                db = AlignDB('sddetector.{}.db'.format(i), copy=True)
                lDbs.append(db)

            TASKS = []
            idx = 0
            for i, sbjct in enumerate(lSbjcts):
                for j, query in enumerate(lQueries[i:]):
                    lAlgmts = self.db.selectAlignmentsWithDefinedSbjctAndQueryOrderBySbjctCoord(
                        sbjct, query)
                    TASKS.append((chainSbjctQueryAlgmts,
                                  (lDbs[idx], maxGap, chainLength, sbjct,
                                   query, lAlgmts)))
                    if idx == len(lDbs) - 1:
                        pool = multiprocessing.Pool(self.procs)
                        results = [
                            pool.apply_async(working_process, t) for t in TASKS
                        ]

                        for i, r in enumerate(results):
                            r_chains = r.get()
                            lSelectedChains.extend(r_chains)
                        TASKS = []
                        idx = -1
                    idx += 1

            if TASKS:
                pool = multiprocessing.Pool(self.procs)
                results = [pool.apply_async(working_process, t) for t in TASKS]
                for i, r in enumerate(results):
                    r_chains = r.get()
                    lSelectedChains.extend(r_chains)

            for db in lDbs:
                os.remove(db.dbfile)
            os.remove('dump.sql')
        else:

            for i, sbjct in enumerate(lSbjcts):
                for j, query in enumerate(lQueries[i:]):
                    logging.debug(
                        'Chaining Alignment with subject: {} and query: {}'.
                        format(sbjct, query))
                    lAlgmts = self.db.selectAlignmentsWithDefinedSbjctAndQueryOrderBySbjctCoord(
                        sbjct, query)
                    chainer = AlignmentChainer(self.db, maxGap=maxGap)
                    chainer.chainAlignments(lAlgmts)
                    nb_selected_chains = 0
                    for chain in chainer.lChains:
                        if chain.getLength() > chainLength:
                            lSelectedChains.append(chain)
                            nb_selected_chains += 1
                    logging.info(
                        'Selecting {} chains with subject: {} and query: {}'.
                        format(nb_selected_chains, sbjct, query))

        chainer2 = AlignmentChainer(self.db, maxGap=maxGap)
        self.lSortedChains = chainer2.sortListOfChains(lSelectedChains)