def test_chainAlignments_Case1(self): """Test chainAlignment case1: simple chain""" al1 = Alignment('q1', 's1', 1000, 1200, 2000, 2200, 200, 200, 1, 1, id=1) al2 = Alignment('q1', 's1', 1500, 2000, 3000, 3500, 500, 500, 1, 1, id=2) lAlignments = [al1, al2] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments) self.assertEquals([Chain([al1, al2])], iAlgmtChainer.lChains) self.assertEquals({1: [0], 2: [0]}, iAlgmtChainer.dIndex)
def test_removeInternalAlignments(self): """...""" al1 = Alignment('Chr1', 'Chr1', 12806596, 12809714, 12796459, 12799562, 3123, 3123, 1, 1, id=1) al2 = Alignment('Chr1', 'Chr1', 12810507, 12813088, 12796459, 12799028, 2592, 2592, 1, 1, id=2) lAlignments = [al1, al2] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments)
def getAllAlignments(self): """Return list of all Alignments""" lAlignments = [] with open(self.inputBlastTabFile, 'r') as input : index = 0 nb_hsp = 0 for line in input: index += 1 nb_hsp += 1 qseqid,sseqid,qstart,qend,sstart,send,length,nident = line.split('\t') qframe = 1 sframe = None if int(sstart) < int(send): sframe = 1 else: sframe = -1 if sframe == 1: lAlignments.append(Alignment(qseqid, sseqid, int(qstart), int(qend), int(sstart), int(send), int(length), int(nident), int(qframe), int(sframe), id=index)) if sframe == -1: lAlignments.append(Alignment(qseqid, sseqid, int(qstart), int(qend), int(send), int(sstart), int(length), int(nident), int(qframe), int(sframe), id=index)) logging.debug('{} HSP parsed'.format(nb_hsp)) input.closed return lAlignments
def getAllAlignments(self): """Return list of all Alignments""" lAlignments = [] with open(self.inputBlastXMLFile, 'r') as input : blast_records = NCBIXML.parse(input) index = 0 for blast_record in blast_records: logging.debug('QUERY: {}'.format(blast_record.query)) for alignment in blast_record.alignments: logging.debug('SUBJECT: {}'.format(alignment.hit_id)) nb_hsp = 0 for hsp in alignment.hsps: nb_hsp += 1 index += 1 if hsp.frame[1] == 1: lAlignments.append(Alignment(blast_record.query, alignment.hit_id, hsp.query_start, hsp.query_end, hsp.sbjct_start, hsp.sbjct_end, hsp.align_length, hsp.identities, hsp.frame[0], hsp.frame[1], id=index)) elif hsp.frame[1] == -1: lAlignments.append(Alignment(blast_record.query, alignment.hit_id, hsp.query_start, hsp.query_end, hsp.sbjct_end, hsp.sbjct_start, hsp.align_length, hsp.identities, hsp.frame[0], hsp.frame[1], id=index)) else: logging.error('Blast Parsing: Unknown strand') raise Exception("Unknown strand") logging.debug('{} HSP parsed'.format(nb_hsp)) input.closed return lAlignments
def addAlignment(self): """add alignment to the list""" self.nbAlgmts += 1 if self.sframe == 1: self.lAlgmts.append(Alignment(self.qid,self.sid,self.qstart, self.qend,self.sstart,self.send,self.length, self.ident, self.qframe, self.sframe, id=self.nbAlgmts)) elif self.sframe == -1: self.lAlgmts.append(Alignment(self.qid,self.sid,self.qstart, self.qend,self.send,self.sstart,self.length, self.ident, self.qframe, self.sframe, id=self.nbAlgmts)) else: logging.error('Blast Parsing: Unknown strand')
def selectAllAlignments(self): """Select all alignments""" cursor = self.conn.execute('''select id, query, sbjct, qstart, qend, sstart, send, length, identities, qstrand, sstrand from alignment''') return [ Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]) for row in cursor ]
def selectAlignmentsWithDefinedIdOrderBySbjctCoord(self, lIds): """Select ordered sub-list of alignments with defined id""" cursor = self.conn.execute('''select id, query, sbjct, qstart, qend, sstart, send, length, identities, qstrand, sstrand from alignment where \ id in ({}) order by sstart ASC''' \ .format(','.join([ '\'{}\''.format(id) for id in lIds ]))) return [ Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]) for row in cursor ]
def _reverse_algmts(self, lAlgmts): lReversedAlgmts = [] for algmt in lAlgmts: lReversedAlgmts.append( Alignment(algmt.sbjct, algmt.query, algmt.sstart, algmt.send, algmt.qstart, algmt.qend, algmt.length, algmt.identities, algmt.sstrand, algmt.qstrand, "{}.reverse".format(algmt.id))) return lReversedAlgmts
def selectAlignmentsWithDefinedSbjctAndQueryOrderBySbjctCoord(self, sbjct, query): """Select ordered sub-list of alignments with defined sbjct and query""" cursor = self.conn.execute('''select id, query, sbjct, qstart, qend, sstart, send, length, identities, qstrand, sstrand from alignment where \ sbjct = \'{}\' and query = \'{}\' \ order by sstart ASC''' \ .format(sbjct,query)) return [ Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]) for row in cursor ]
def exportDbToGff3(self, fileName): """Export entire db in gff3 match features""" with open(fileName,'w') as f: cursor = self.conn.execute('''select id, query, sbjct, qstart, qend, sstart, send, length,identities, qstrand, sstrand from alignment''') for row in cursor : f.write(Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]).convertToGff3()) f.close()
def selectAlignmentById(self, id): """Select alignment by id""" cursor = self.conn.execute('''select id, query, sbjct, qstart, qend, sstart, send, length, identities, qstrand, sstrand from alignment where id = {}''' \ .format(id)) row = cursor.fetchone() if row: return Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]) else: return None
def test_getAllAlignments(self): """Test getAllAlignments method""" iBlastTabParser = BlastTabParser("test-data/blast.tab") lAlignments = [ Alignment('seq1', 'seq1', 1, 167, 1, 167, 167, 167, 1, 1, id=1), Alignment('seq1', 'seq2', 1, 164, 237, 400, 164, 164, 1, -1, id=2), Alignment('seq1', 'seq2', 50, 113, 54, 118, 66, 63, 1, 1, id=3), Alignment('seq2', 'seq2', 1, 400, 1, 400, 400, 400, 1, 1, id=4), Alignment('seq2', 'seq2', 288, 351, 54, 118, 66, 63, 1, -1, id=5), Alignment('seq2', 'seq2', 54, 118, 288, 351, 66, 63, 1, -1, id=6), Alignment('seq2', 'seq1', 237, 400, 1, 164, 164, 164, 1, -1, id=7), Alignment('seq2', 'seq1', 54, 118, 50, 113, 66, 63, 1, 1, id=8) ] self.assertEqual(iBlastTabParser.getAllAlignments(), lAlignments)
def selectProximalAlgmts(self, id, maxGap=3000): """ Select alignments with a maximal distance of 'maxGap' between the provided alignment and alignments on identical (subject/query) further up the subject and collinear !!!! """ cursor = self.conn.execute('''select al2.id, al2.query, al2.sbjct, al2.qstart, al2.qend, al2.sstart, al2.send, al2.length, al2.identities, al2.qstrand, al2.sstrand from alignment al1, alignment al2 where \ (al2.sstart-al1.send) < {} and al1.sstart < al2.sstart \ and (al2.sstart-al1.send) > -1 \ and al1.sbjct = al2.sbjct and al1.query = al2.query \ and al1.qstrand = al2.qstrand and al1.sstrand = al2.sstrand \ and al1.id = {} order by al2.sstart''' \ .format(maxGap,id)) return [ Alignment(row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], id=row[0]) for row in cursor ]
def test_distanceBetweenQueryAlgmts(self): """Test distanceBetweenQueryAlgmts""" algmt1 = Alignment('q1', 's1', 20, 30, 10, 20, 10, 10, 1, 1, id=1) algmt2 = Alignment('q1', 's1', 100, 110, 50, 60, 10, 10, 1, 1, id=2) algmt3 = Alignment('q1', 's1', 100, 110, 10, 20, 10, 10, 1, 1, id=3) algmt4 = Alignment('q1', 's1', 20, 30, 50, 60, 10, 10, 1, 1, id=4) algmt5 = Alignment('q1', 's1', 10, 30, 10, 20, 10, 10, 1, 1, id=5) algmt6 = Alignment('q1', 's1', 20, 50, 50, 60, 10, 10, 1, 1, id=6) algmt7 = Alignment('q1', 's1', 10, 50, 10, 20, 10, 10, 1, 1, id=7) algmt8 = Alignment('q1', 's1', 20, 30, 50, 60, 10, 10, 1, 1, id=8) algmt9 = Alignment('q1', 's1', 20, 50, 10, 20, 10, 10, 1, 1, id=9) algmt10 = Alignment('q1', 's1', 10, 30, 50, 60, 10, 10, 1, 1, id=10) algmt11 = Alignment('q1', 's1', 20, 30, 10, 20, 10, 10, 1, 1, id=11) algmt12 = Alignment('q1', 's1', 10, 50, 50, 60, 10, 10, 1, 1, id=12) iAlgmtChainer = AlignmentChainer(self.db) self.assertEquals( 69, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt1, algmt2), "1 before 2") self.assertEquals( 69, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt3, algmt4), "2 before 1") self.assertEquals( -11, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt5, algmt6), "1 start , 2 overlap") self.assertEquals( -31, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt7, algmt8), "1 start before, 2 nested") self.assertEquals( -11, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt9, algmt10), "2 starrt before, 1 overlap") self.assertEquals( -31, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt11, algmt12), "2 start before, 1 nested")
def test_chainAlignments_Case3(self): """Test chainAlignment case3: complex chain with interleaved coordinates""" al1 = Alignment('q1', 's1', 1000, 1200, 2000, 2200, 200, 200, 1, 1, id=1) al2 = Alignment('q1', 's1', 1500, 2000, 3000, 3500, 500, 500, 1, 1, id=2) al3 = Alignment('q1', 's1', 10000, 13000, 150000, 153000, 3000, 3000, 1, -1, id=3) al4 = Alignment('q1', 's1', 11000, 14000, 154000, 157000, 3000, 3000, 1, -1, id=4) al5 = Alignment('q1', 's1', 123000, 124000, 160000, 161000, 1000, 1000, 1, 1, id=5) lAlignments = [al1, al2, al3, al4, al5] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments) self.assertEquals( [Chain([al1, al2]), Chain([al3]), Chain([al4]), Chain([al5])], iAlgmtChainer.lChains) self.assertEquals({ 1: [0], 2: [0], 3: [1], 4: [2], 5: [3] }, iAlgmtChainer.dIndex)
def test_removeOverlappingChains(self): """Test removeOverlappingChains""" al1 = Alignment('q1', 's1', 1000, 1201, 2000, 2201, 202, 202, 1, 1, id=1) al2 = Alignment('q1', 's1', 1500, 2001, 3000, 3501, 502, 502, 1, 1, id=2) al3 = Alignment('q1', 's1', 1000, 1200, 2100, 2300, 201, 201, 1, 1, id=3) al4 = Alignment('q1', 's1', 1500, 2000, 3100, 3600, 501, 501, 1, 1, id=4) al5 = Alignment('q1', 's1', 123000, 124000, 160000, 161000, 1001, 1001, 1, 1, id=5) al1b = Alignment('s1', 'q1', 2000, 2201, 1000, 1201, 202, 202, 1, 1, id=16) al2b = Alignment('s1', 'q1', 3000, 3501, 1500, 2001, 502, 502, 1, 1, id=11) al3b = Alignment('s1', 'q1', 2100, 2300, 1000, 1200, 201, 201, 1, 1, id=31) al4b = Alignment('s1', 'q1', 3100, 3600, 1500, 2000, 501, 501, 1, 1, id=41) lAlignments = [al1, al2, al3, al4, al5, al1b, al2b, al3b, al4b] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments)
def test_chainAlignments_Case2(self): """Test chainAlignment case2: variable constraints""" al1 = Alignment('q1', 's1', 1000, 1200, 2000, 2200, 200, 200, 1, 1, id=1) al2 = Alignment('q1', 's1', 1500, 2000, 3000, 3500, 500, 500, 1, 1, id=2) al3 = Alignment('q1', 's1', 10000, 13000, 150000, 153000, 3000, 3000, 1, -1, id=3) al4 = Alignment('q1', 's1', 123000, 124000, 160000, 161000, 1000, 1000, 1, 1, id=4) lAlignments = [al1, al2, al3, al4] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments) self.assertEquals( [Chain([al1, al2]), Chain([al3]), Chain([al4])], iAlgmtChainer.lChains) self.assertEquals({ 1: [0], 2: [0], 3: [1], 4: [2] }, iAlgmtChainer.dIndex)
def test_removeOverlappingChains_case2(self): al1 = Alignment('Chr1', 'Chr1', 15088003, 15088582, 15253139, 15253732, 594, 594, 1, 1, id=1) al2 = Alignment('Chr1', 'Chr1', 15094505, 15096966, 15254239, 15256725, 2499, 2499, 1, 1, id=2) al3 = Alignment('Chr1', 'Chr1', 15090989, 15092375, 15257818, 15259247, 1432, 1432, 1, 1, id=3) al4 = Alignment('Chr1', 'Chr1', 15098601, 15102401, 15259265, 15263155, 3904, 3904, 1, 1, id=4) al5 = Alignment('Chr1', 'Chr1', 15088003, 15090006, 15257754, 15259751, 2021, 2021, 1, 1, id=5) al6 = Alignment('Chr1', 'Chr1', 15090989, 15092375, 15257818, 15259247, 1432, 1432, 1, 1, id=6) al7 = Alignment('Chr1', 'Chr1', 15094655, 15097013, 15258796, 15261191, 2410, 2410, 1, 1, id=7) al8 = Alignment('Chr1', 'Chr1', 15098601, 15102401, 15259265, 15263155, 3904, 3904, 1, 1, id=8) lAlignments = [al1, al2, al3, al4, al5, al6, al7, al8] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments)