def test_chainAlignments_Case1(self):
        """Test chainAlignment case1: simple chain"""

        al1 = Alignment('q1',
                        's1',
                        1000,
                        1200,
                        2000,
                        2200,
                        200,
                        200,
                        1,
                        1,
                        id=1)
        al2 = Alignment('q1',
                        's1',
                        1500,
                        2000,
                        3000,
                        3500,
                        500,
                        500,
                        1,
                        1,
                        id=2)
        lAlignments = [al1, al2]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
        self.assertEquals([Chain([al1, al2])], iAlgmtChainer.lChains)
        self.assertEquals({1: [0], 2: [0]}, iAlgmtChainer.dIndex)
    def test_removeInternalAlignments(self):
        """..."""

        al1 = Alignment('Chr1',
                        'Chr1',
                        12806596,
                        12809714,
                        12796459,
                        12799562,
                        3123,
                        3123,
                        1,
                        1,
                        id=1)
        al2 = Alignment('Chr1',
                        'Chr1',
                        12810507,
                        12813088,
                        12796459,
                        12799028,
                        2592,
                        2592,
                        1,
                        1,
                        id=2)
        lAlignments = [al1, al2]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
    def getAllAlignments(self):
        """Return list of all Alignments"""

        lAlignments = []
        with open(self.inputBlastTabFile,  'r') as input :
            index = 0
            nb_hsp = 0
            for line in input:
                index += 1
                nb_hsp += 1
                qseqid,sseqid,qstart,qend,sstart,send,length,nident = line.split('\t')
                qframe = 1
                sframe = None
                if int(sstart) < int(send):
                    sframe = 1
                else:
                    sframe = -1

                if sframe == 1: 
                    lAlignments.append(Alignment(qseqid, sseqid, int(qstart), int(qend), int(sstart), int(send), int(length),
                                                 int(nident), int(qframe), int(sframe), id=index))
                if sframe == -1:
                    lAlignments.append(Alignment(qseqid, sseqid, int(qstart), int(qend), int(send), int(sstart), int(length),
                                                 int(nident), int(qframe), int(sframe), id=index))

            logging.debug('{} HSP parsed'.format(nb_hsp))

        input.closed
        return lAlignments
Beispiel #4
0
    def getAllAlignments(self):
        """Return list of all Alignments"""

        lAlignments = []
        with open(self.inputBlastXMLFile,  'r') as input :

            blast_records = NCBIXML.parse(input)
            index = 0

            for blast_record in blast_records:
                logging.debug('QUERY: {}'.format(blast_record.query))

                for alignment in blast_record.alignments:
                    logging.debug('SUBJECT: {}'.format(alignment.hit_id))
                    nb_hsp = 0
                    for hsp in alignment.hsps:
                        nb_hsp += 1
                        index += 1
                        if hsp.frame[1] == 1:
                            lAlignments.append(Alignment(blast_record.query, alignment.hit_id,
                                               hsp.query_start, hsp.query_end, hsp.sbjct_start,
                                               hsp.sbjct_end, hsp.align_length, hsp.identities, hsp.frame[0], hsp.frame[1], id=index))
                        elif hsp.frame[1] == -1:
                            lAlignments.append(Alignment(blast_record.query, alignment.hit_id,
                                               hsp.query_start, hsp.query_end, hsp.sbjct_end,
                                               hsp.sbjct_start, hsp.align_length, hsp.identities, hsp.frame[0], hsp.frame[1], id=index))
                        else:
                            logging.error('Blast Parsing: Unknown strand')
                            raise Exception("Unknown strand")
                    logging.debug('{} HSP parsed'.format(nb_hsp))
        input.closed
        return lAlignments
Beispiel #5
0
    def addAlignment(self):
        """add alignment to the list"""

        self.nbAlgmts += 1

        if self.sframe == 1:
            self.lAlgmts.append(Alignment(self.qid,self.sid,self.qstart,
                                self.qend,self.sstart,self.send,self.length,
                                self.ident, self.qframe, self.sframe, id=self.nbAlgmts))
        elif self.sframe == -1:
            self.lAlgmts.append(Alignment(self.qid,self.sid,self.qstart,
                                self.qend,self.send,self.sstart,self.length,
                                self.ident, self.qframe, self.sframe, id=self.nbAlgmts))
        else:
            logging.error('Blast Parsing: Unknown strand')
Beispiel #6
0
    def selectAllAlignments(self):
        """Select all alignments"""

        cursor = self.conn.execute('''select id, query, sbjct, qstart,
                                   qend, sstart, send, length, identities,
                                   qstrand, sstrand from alignment''')
        return [ Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]) for row in cursor ]
Beispiel #7
0
    def selectAlignmentsWithDefinedIdOrderBySbjctCoord(self, lIds):
        """Select ordered sub-list of alignments with defined id"""

        cursor = self.conn.execute('''select id, query, sbjct, qstart,
                                   qend, sstart, send, length, identities,
                                   qstrand, sstrand from alignment where \
                                   id in ({}) order by sstart ASC''' \
                                   .format(','.join([ '\'{}\''.format(id) for id in lIds ])))

        return [ Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]) for row in cursor ]
Beispiel #8
0
    def _reverse_algmts(self, lAlgmts):

        lReversedAlgmts = []
        for algmt in lAlgmts:
            lReversedAlgmts.append(
                Alignment(algmt.sbjct, algmt.query, algmt.sstart, algmt.send,
                          algmt.qstart, algmt.qend, algmt.length,
                          algmt.identities, algmt.sstrand, algmt.qstrand,
                          "{}.reverse".format(algmt.id)))
        return lReversedAlgmts
Beispiel #9
0
    def selectAlignmentsWithDefinedSbjctAndQueryOrderBySbjctCoord(self, sbjct, query):
        """Select ordered sub-list of alignments with defined sbjct and query"""

        cursor = self.conn.execute('''select id, query, sbjct, qstart,
                                   qend, sstart, send, length, identities,
                                   qstrand, sstrand from alignment where \
                                   sbjct = \'{}\' and query = \'{}\' \
                                   order by sstart ASC''' \
                                   .format(sbjct,query))

        return [ Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]) for row in cursor ]
Beispiel #10
0
    def exportDbToGff3(self, fileName):
        """Export entire db in gff3 match features"""

        with open(fileName,'w') as f:
            cursor = self.conn.execute('''select id, query, sbjct, qstart,
                                          qend, sstart, send, length,identities,
                                          qstrand, sstrand from alignment''')

            for row in cursor :
                f.write(Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]).convertToGff3())

        f.close()
Beispiel #11
0
    def selectAlignmentById(self, id):
        """Select alignment by id"""

        cursor = self.conn.execute('''select id, query, sbjct, qstart,
                                   qend, sstart, send, length, identities,
                                   qstrand, sstrand from alignment where id = {}''' \
                                   .format(id))
        row = cursor.fetchone()
        if row:
            return Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0])
        else:
            return None
Beispiel #12
0
    def test_getAllAlignments(self):
        """Test getAllAlignments method"""

        iBlastTabParser = BlastTabParser("test-data/blast.tab")
        lAlignments = [
            Alignment('seq1', 'seq1', 1, 167, 1, 167, 167, 167, 1, 1, id=1),
            Alignment('seq1', 'seq2', 1, 164, 237, 400, 164, 164, 1, -1, id=2),
            Alignment('seq1', 'seq2', 50, 113, 54, 118, 66, 63, 1, 1, id=3),
            Alignment('seq2', 'seq2', 1, 400, 1, 400, 400, 400, 1, 1, id=4),
            Alignment('seq2', 'seq2', 288, 351, 54, 118, 66, 63, 1, -1, id=5),
            Alignment('seq2', 'seq2', 54, 118, 288, 351, 66, 63, 1, -1, id=6),
            Alignment('seq2', 'seq1', 237, 400, 1, 164, 164, 164, 1, -1, id=7),
            Alignment('seq2', 'seq1', 54, 118, 50, 113, 66, 63, 1, 1, id=8)
        ]

        self.assertEqual(iBlastTabParser.getAllAlignments(), lAlignments)
Beispiel #13
0
    def selectProximalAlgmts(self, id, maxGap=3000):
        """
            Select alignments with a maximal distance of 'maxGap' between the provided
            alignment and alignments on identical (subject/query) further up the subject
            and collinear !!!!
        """

        cursor = self.conn.execute('''select al2.id, al2.query, al2.sbjct, al2.qstart,
                                   al2.qend, al2.sstart, al2.send, al2.length, al2.identities,
                                   al2.qstrand, al2.sstrand from alignment al1, alignment al2 where \
                                   (al2.sstart-al1.send) < {} and al1.sstart < al2.sstart \
                                   and (al2.sstart-al1.send) > -1 \
                                   and al1.sbjct = al2.sbjct and al1.query = al2.query \
                                   and al1.qstrand = al2.qstrand and al1.sstrand = al2.sstrand \
                                   and al1.id = {} order by al2.sstart''' \
                                   .format(maxGap,id))

        return [ Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]) for row in cursor ]
    def test_distanceBetweenQueryAlgmts(self):
        """Test distanceBetweenQueryAlgmts"""

        algmt1 = Alignment('q1', 's1', 20, 30, 10, 20, 10, 10, 1, 1, id=1)
        algmt2 = Alignment('q1', 's1', 100, 110, 50, 60, 10, 10, 1, 1, id=2)
        algmt3 = Alignment('q1', 's1', 100, 110, 10, 20, 10, 10, 1, 1, id=3)
        algmt4 = Alignment('q1', 's1', 20, 30, 50, 60, 10, 10, 1, 1, id=4)
        algmt5 = Alignment('q1', 's1', 10, 30, 10, 20, 10, 10, 1, 1, id=5)
        algmt6 = Alignment('q1', 's1', 20, 50, 50, 60, 10, 10, 1, 1, id=6)
        algmt7 = Alignment('q1', 's1', 10, 50, 10, 20, 10, 10, 1, 1, id=7)
        algmt8 = Alignment('q1', 's1', 20, 30, 50, 60, 10, 10, 1, 1, id=8)
        algmt9 = Alignment('q1', 's1', 20, 50, 10, 20, 10, 10, 1, 1, id=9)
        algmt10 = Alignment('q1', 's1', 10, 30, 50, 60, 10, 10, 1, 1, id=10)
        algmt11 = Alignment('q1', 's1', 20, 30, 10, 20, 10, 10, 1, 1, id=11)
        algmt12 = Alignment('q1', 's1', 10, 50, 50, 60, 10, 10, 1, 1, id=12)

        iAlgmtChainer = AlignmentChainer(self.db)
        self.assertEquals(
            69, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt1, algmt2),
            "1 before 2")
        self.assertEquals(
            69, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt3, algmt4),
            "2 before 1")
        self.assertEquals(
            -11, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt5, algmt6),
            "1 start , 2 overlap")
        self.assertEquals(
            -31, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt7, algmt8),
            "1 start before, 2 nested")
        self.assertEquals(
            -11, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt9, algmt10),
            "2 starrt before, 1 overlap")
        self.assertEquals(
            -31, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt11, algmt12),
            "2 start before, 1 nested")
    def test_chainAlignments_Case3(self):
        """Test chainAlignment case3: complex chain with interleaved coordinates"""

        al1 = Alignment('q1',
                        's1',
                        1000,
                        1200,
                        2000,
                        2200,
                        200,
                        200,
                        1,
                        1,
                        id=1)
        al2 = Alignment('q1',
                        's1',
                        1500,
                        2000,
                        3000,
                        3500,
                        500,
                        500,
                        1,
                        1,
                        id=2)
        al3 = Alignment('q1',
                        's1',
                        10000,
                        13000,
                        150000,
                        153000,
                        3000,
                        3000,
                        1,
                        -1,
                        id=3)
        al4 = Alignment('q1',
                        's1',
                        11000,
                        14000,
                        154000,
                        157000,
                        3000,
                        3000,
                        1,
                        -1,
                        id=4)
        al5 = Alignment('q1',
                        's1',
                        123000,
                        124000,
                        160000,
                        161000,
                        1000,
                        1000,
                        1,
                        1,
                        id=5)
        lAlignments = [al1, al2, al3, al4, al5]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
        self.assertEquals(
            [Chain([al1, al2]),
             Chain([al3]),
             Chain([al4]),
             Chain([al5])], iAlgmtChainer.lChains)
        self.assertEquals({
            1: [0],
            2: [0],
            3: [1],
            4: [2],
            5: [3]
        }, iAlgmtChainer.dIndex)
    def test_removeOverlappingChains(self):
        """Test removeOverlappingChains"""

        al1 = Alignment('q1',
                        's1',
                        1000,
                        1201,
                        2000,
                        2201,
                        202,
                        202,
                        1,
                        1,
                        id=1)
        al2 = Alignment('q1',
                        's1',
                        1500,
                        2001,
                        3000,
                        3501,
                        502,
                        502,
                        1,
                        1,
                        id=2)
        al3 = Alignment('q1',
                        's1',
                        1000,
                        1200,
                        2100,
                        2300,
                        201,
                        201,
                        1,
                        1,
                        id=3)
        al4 = Alignment('q1',
                        's1',
                        1500,
                        2000,
                        3100,
                        3600,
                        501,
                        501,
                        1,
                        1,
                        id=4)
        al5 = Alignment('q1',
                        's1',
                        123000,
                        124000,
                        160000,
                        161000,
                        1001,
                        1001,
                        1,
                        1,
                        id=5)
        al1b = Alignment('s1',
                         'q1',
                         2000,
                         2201,
                         1000,
                         1201,
                         202,
                         202,
                         1,
                         1,
                         id=16)
        al2b = Alignment('s1',
                         'q1',
                         3000,
                         3501,
                         1500,
                         2001,
                         502,
                         502,
                         1,
                         1,
                         id=11)
        al3b = Alignment('s1',
                         'q1',
                         2100,
                         2300,
                         1000,
                         1200,
                         201,
                         201,
                         1,
                         1,
                         id=31)
        al4b = Alignment('s1',
                         'q1',
                         3100,
                         3600,
                         1500,
                         2000,
                         501,
                         501,
                         1,
                         1,
                         id=41)
        lAlignments = [al1, al2, al3, al4, al5, al1b, al2b, al3b, al4b]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
    def test_chainAlignments_Case2(self):
        """Test chainAlignment case2: variable constraints"""

        al1 = Alignment('q1',
                        's1',
                        1000,
                        1200,
                        2000,
                        2200,
                        200,
                        200,
                        1,
                        1,
                        id=1)
        al2 = Alignment('q1',
                        's1',
                        1500,
                        2000,
                        3000,
                        3500,
                        500,
                        500,
                        1,
                        1,
                        id=2)
        al3 = Alignment('q1',
                        's1',
                        10000,
                        13000,
                        150000,
                        153000,
                        3000,
                        3000,
                        1,
                        -1,
                        id=3)
        al4 = Alignment('q1',
                        's1',
                        123000,
                        124000,
                        160000,
                        161000,
                        1000,
                        1000,
                        1,
                        1,
                        id=4)
        lAlignments = [al1, al2, al3, al4]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)
        self.assertEquals(
            [Chain([al1, al2]), Chain([al3]),
             Chain([al4])], iAlgmtChainer.lChains)
        self.assertEquals({
            1: [0],
            2: [0],
            3: [1],
            4: [2]
        }, iAlgmtChainer.dIndex)
    def test_removeOverlappingChains_case2(self):

        al1 = Alignment('Chr1',
                        'Chr1',
                        15088003,
                        15088582,
                        15253139,
                        15253732,
                        594,
                        594,
                        1,
                        1,
                        id=1)
        al2 = Alignment('Chr1',
                        'Chr1',
                        15094505,
                        15096966,
                        15254239,
                        15256725,
                        2499,
                        2499,
                        1,
                        1,
                        id=2)
        al3 = Alignment('Chr1',
                        'Chr1',
                        15090989,
                        15092375,
                        15257818,
                        15259247,
                        1432,
                        1432,
                        1,
                        1,
                        id=3)
        al4 = Alignment('Chr1',
                        'Chr1',
                        15098601,
                        15102401,
                        15259265,
                        15263155,
                        3904,
                        3904,
                        1,
                        1,
                        id=4)
        al5 = Alignment('Chr1',
                        'Chr1',
                        15088003,
                        15090006,
                        15257754,
                        15259751,
                        2021,
                        2021,
                        1,
                        1,
                        id=5)
        al6 = Alignment('Chr1',
                        'Chr1',
                        15090989,
                        15092375,
                        15257818,
                        15259247,
                        1432,
                        1432,
                        1,
                        1,
                        id=6)
        al7 = Alignment('Chr1',
                        'Chr1',
                        15094655,
                        15097013,
                        15258796,
                        15261191,
                        2410,
                        2410,
                        1,
                        1,
                        id=7)
        al8 = Alignment('Chr1',
                        'Chr1',
                        15098601,
                        15102401,
                        15259265,
                        15263155,
                        3904,
                        3904,
                        1,
                        1,
                        id=8)
        lAlignments = [al1, al2, al3, al4, al5, al6, al7, al8]
        self.db.deleteAllAlignments()
        self.db.insertlAlignments(lAlignments)
        iAlgmtChainer = AlignmentChainer(self.db)
        iAlgmtChainer.chainAlignments(lAlignments)