def test_chainAlignments_Case1(self): """Test chainAlignment case1: simple chain""" al1 = Alignment('q1', 's1', 1000, 1200, 2000, 2200, 200, 200, 1, 1, id=1) al2 = Alignment('q1', 's1', 1500, 2000, 3000, 3500, 500, 500, 1, 1, id=2) lAlignments = [al1, al2] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments) self.assertEquals([Chain([al1, al2])], iAlgmtChainer.lChains) self.assertEquals({1: [0], 2: [0]}, iAlgmtChainer.dIndex)
def test_removeInternalAlignments(self): """...""" al1 = Alignment('Chr1', 'Chr1', 12806596, 12809714, 12796459, 12799562, 3123, 3123, 1, 1, id=1) al2 = Alignment('Chr1', 'Chr1', 12810507, 12813088, 12796459, 12799028, 2592, 2592, 1, 1, id=2) lAlignments = [al1, al2] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments)
def test_distanceBetweenQueryAlgmts(self): """Test distanceBetweenQueryAlgmts""" algmt1 = Alignment('q1', 's1', 20, 30, 10, 20, 10, 10, 1, 1, id=1) algmt2 = Alignment('q1', 's1', 100, 110, 50, 60, 10, 10, 1, 1, id=2) algmt3 = Alignment('q1', 's1', 100, 110, 10, 20, 10, 10, 1, 1, id=3) algmt4 = Alignment('q1', 's1', 20, 30, 50, 60, 10, 10, 1, 1, id=4) algmt5 = Alignment('q1', 's1', 10, 30, 10, 20, 10, 10, 1, 1, id=5) algmt6 = Alignment('q1', 's1', 20, 50, 50, 60, 10, 10, 1, 1, id=6) algmt7 = Alignment('q1', 's1', 10, 50, 10, 20, 10, 10, 1, 1, id=7) algmt8 = Alignment('q1', 's1', 20, 30, 50, 60, 10, 10, 1, 1, id=8) algmt9 = Alignment('q1', 's1', 20, 50, 10, 20, 10, 10, 1, 1, id=9) algmt10 = Alignment('q1', 's1', 10, 30, 50, 60, 10, 10, 1, 1, id=10) algmt11 = Alignment('q1', 's1', 20, 30, 10, 20, 10, 10, 1, 1, id=11) algmt12 = Alignment('q1', 's1', 10, 50, 50, 60, 10, 10, 1, 1, id=12) iAlgmtChainer = AlignmentChainer(self.db) self.assertEquals( 69, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt1, algmt2), "1 before 2") self.assertEquals( 69, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt3, algmt4), "2 before 1") self.assertEquals( -11, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt5, algmt6), "1 start , 2 overlap") self.assertEquals( -31, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt7, algmt8), "1 start before, 2 nested") self.assertEquals( -11, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt9, algmt10), "2 starrt before, 1 overlap") self.assertEquals( -31, iAlgmtChainer.distanceBetweenQueryAlgmts(algmt11, algmt12), "2 start before, 1 nested")
def chainSbjctQueryAlgmts(db, maxGap, chainLength, sbjct, query, lAlgmts): lSelectedChains = [] logging.debug('Chaining Alignment with subject: {} and query: {}'.format( sbjct, query)) chainer = AlignmentChainer(db, maxGap=maxGap) chainer.chainAlignments(lAlgmts, multiproc=True) nb_selected_chains = 0 for chain in chainer.lChains: if chain.getLength() > chainLength: lSelectedChains.append(chain) nb_selected_chains += 1 logging.info('Selecting {} chains with subject: {} and query: {}'.format( nb_selected_chains, sbjct, query)) return lSelectedChains
def _chainSbjctQueryAlgmts(self, sbjct, query): lSelectedChains = [] logging.debug( 'Chaining Alignment with subject: {} and query: {}'.format( sbjct, query)) lAlgmts = self.db.selectAlignmentsWithDefinedSbjctAndQueryOrderBySbjctCoord( sbjct, query) chainer = AlignmentChainer(self.db, maxGap=maxGap) chainer.chainAlignments(lAlgmts) nb_selected_chains = 0 for chain in chainer.lChains: if chain.getLength() > chainLength: lSelectedChains.append(chain) nb_selected_chains += 1 logging.info( 'Selecting {} chains with subject: {} and query: {}'.format( nb_selected_chains, sbjct, query)) return lSelectedChains
def chainAlignments(self, maxGap=3000, chainLength=5000): """Chain Alignments""" lSbjcts = self.db.selectAllSbjcts() lQueries = self.db.selectAllQueries() lSelectedChains = [] for sbjct in lSbjcts: for query in lQueries: logging.debug('Chaining Alignment with subject: {} and query: {}'.format(sbjct, query)) lAlgmts = self.db.selectAlignmentsWithDefinedSbjctAndQueryOrderBySbjctCoord(sbjct,query) chainer = AlignmentChainer(self.db, maxGap=maxGap) chainer.chainAlignments(lAlgmts) for chain in chainer.lChains: if chain.getLength() > chainLength: lSelectedChains.append(chain) chainer2 = AlignmentChainer(self.db, maxGap=maxGap) self.lSortedChains = chainer2.sortListOfChains(lSelectedChains)
def removeDuplicationWithInternalSimilarity(self): """Remove Duplication with internal similarity""" chainer = AlignmentChainer(self.db) self.lSortedChains = chainer.removeChainsWithInternalSimilarity(self.lSortedChains)
def removeOverlappingDuplications(self): """Remove Overlapping Duplications""" chainer = AlignmentChainer(self.db) self.lSortedChains = chainer.removeOverlappingChains(self.lSortedChains)
def test_chainAlignments_Case2(self): """Test chainAlignment case2: variable constraints""" al1 = Alignment('q1', 's1', 1000, 1200, 2000, 2200, 200, 200, 1, 1, id=1) al2 = Alignment('q1', 's1', 1500, 2000, 3000, 3500, 500, 500, 1, 1, id=2) al3 = Alignment('q1', 's1', 10000, 13000, 150000, 153000, 3000, 3000, 1, -1, id=3) al4 = Alignment('q1', 's1', 123000, 124000, 160000, 161000, 1000, 1000, 1, 1, id=4) lAlignments = [al1, al2, al3, al4] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments) self.assertEquals( [Chain([al1, al2]), Chain([al3]), Chain([al4])], iAlgmtChainer.lChains) self.assertEquals({ 1: [0], 2: [0], 3: [1], 4: [2] }, iAlgmtChainer.dIndex)
def test_removeOverlappingChains_case2(self): al1 = Alignment('Chr1', 'Chr1', 15088003, 15088582, 15253139, 15253732, 594, 594, 1, 1, id=1) al2 = Alignment('Chr1', 'Chr1', 15094505, 15096966, 15254239, 15256725, 2499, 2499, 1, 1, id=2) al3 = Alignment('Chr1', 'Chr1', 15090989, 15092375, 15257818, 15259247, 1432, 1432, 1, 1, id=3) al4 = Alignment('Chr1', 'Chr1', 15098601, 15102401, 15259265, 15263155, 3904, 3904, 1, 1, id=4) al5 = Alignment('Chr1', 'Chr1', 15088003, 15090006, 15257754, 15259751, 2021, 2021, 1, 1, id=5) al6 = Alignment('Chr1', 'Chr1', 15090989, 15092375, 15257818, 15259247, 1432, 1432, 1, 1, id=6) al7 = Alignment('Chr1', 'Chr1', 15094655, 15097013, 15258796, 15261191, 2410, 2410, 1, 1, id=7) al8 = Alignment('Chr1', 'Chr1', 15098601, 15102401, 15259265, 15263155, 3904, 3904, 1, 1, id=8) lAlignments = [al1, al2, al3, al4, al5, al6, al7, al8] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments)
def test_removeOverlappingChains(self): """Test removeOverlappingChains""" al1 = Alignment('q1', 's1', 1000, 1201, 2000, 2201, 202, 202, 1, 1, id=1) al2 = Alignment('q1', 's1', 1500, 2001, 3000, 3501, 502, 502, 1, 1, id=2) al3 = Alignment('q1', 's1', 1000, 1200, 2100, 2300, 201, 201, 1, 1, id=3) al4 = Alignment('q1', 's1', 1500, 2000, 3100, 3600, 501, 501, 1, 1, id=4) al5 = Alignment('q1', 's1', 123000, 124000, 160000, 161000, 1001, 1001, 1, 1, id=5) al1b = Alignment('s1', 'q1', 2000, 2201, 1000, 1201, 202, 202, 1, 1, id=16) al2b = Alignment('s1', 'q1', 3000, 3501, 1500, 2001, 502, 502, 1, 1, id=11) al3b = Alignment('s1', 'q1', 2100, 2300, 1000, 1200, 201, 201, 1, 1, id=31) al4b = Alignment('s1', 'q1', 3100, 3600, 1500, 2000, 501, 501, 1, 1, id=41) lAlignments = [al1, al2, al3, al4, al5, al1b, al2b, al3b, al4b] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments)
def test_chainAlignments_Case3(self): """Test chainAlignment case3: complex chain with interleaved coordinates""" al1 = Alignment('q1', 's1', 1000, 1200, 2000, 2200, 200, 200, 1, 1, id=1) al2 = Alignment('q1', 's1', 1500, 2000, 3000, 3500, 500, 500, 1, 1, id=2) al3 = Alignment('q1', 's1', 10000, 13000, 150000, 153000, 3000, 3000, 1, -1, id=3) al4 = Alignment('q1', 's1', 11000, 14000, 154000, 157000, 3000, 3000, 1, -1, id=4) al5 = Alignment('q1', 's1', 123000, 124000, 160000, 161000, 1000, 1000, 1, 1, id=5) lAlignments = [al1, al2, al3, al4, al5] self.db.deleteAllAlignments() self.db.insertlAlignments(lAlignments) iAlgmtChainer = AlignmentChainer(self.db) iAlgmtChainer.chainAlignments(lAlignments) self.assertEquals( [Chain([al1, al2]), Chain([al3]), Chain([al4]), Chain([al5])], iAlgmtChainer.lChains) self.assertEquals({ 1: [0], 2: [0], 3: [1], 4: [2], 5: [3] }, iAlgmtChainer.dIndex)
def pairingChains(self): chainer = AlignmentChainer(self.db) self.lSortedChains = chainer.pairingChains(self.lSortedChains)
def chainAlignments(self, maxGap=3000, chainLength=5000): """Chain Alignments""" lSbjcts = sorted(self.db.selectAllSbjcts()) lQueries = sorted(self.db.selectAllQueries()) if lSbjcts != lQueries: raise 'error in list of subjects / queries of algmts' lSelectedChains = [] if self.procs > 1: lDbs = [] with open('dump.sql', 'w') as f: for line in self.db.conn.iterdump(): f.write('%s\n' % line) f.close() for i in range(0, self.procs): dest = sqlite3.connect('sddetector.{}.db'.format(i)) cur = dest.cursor() f = open('dump.sql', 'r') sql = f.read() f.close() cur.executescript(sql) db = AlignDB('sddetector.{}.db'.format(i), copy=True) lDbs.append(db) TASKS = [] idx = 0 for i, sbjct in enumerate(lSbjcts): for j, query in enumerate(lQueries[i:]): lAlgmts = self.db.selectAlignmentsWithDefinedSbjctAndQueryOrderBySbjctCoord( sbjct, query) TASKS.append((chainSbjctQueryAlgmts, (lDbs[idx], maxGap, chainLength, sbjct, query, lAlgmts))) if idx == len(lDbs) - 1: pool = multiprocessing.Pool(self.procs) results = [ pool.apply_async(working_process, t) for t in TASKS ] for i, r in enumerate(results): r_chains = r.get() lSelectedChains.extend(r_chains) TASKS = [] idx = -1 idx += 1 if TASKS: pool = multiprocessing.Pool(self.procs) results = [pool.apply_async(working_process, t) for t in TASKS] for i, r in enumerate(results): r_chains = r.get() lSelectedChains.extend(r_chains) for db in lDbs: os.remove(db.dbfile) os.remove('dump.sql') else: for i, sbjct in enumerate(lSbjcts): for j, query in enumerate(lQueries[i:]): logging.debug( 'Chaining Alignment with subject: {} and query: {}'. format(sbjct, query)) lAlgmts = self.db.selectAlignmentsWithDefinedSbjctAndQueryOrderBySbjctCoord( sbjct, query) chainer = AlignmentChainer(self.db, maxGap=maxGap) chainer.chainAlignments(lAlgmts) nb_selected_chains = 0 for chain in chainer.lChains: if chain.getLength() > chainLength: lSelectedChains.append(chain) nb_selected_chains += 1 logging.info( 'Selecting {} chains with subject: {} and query: {}'. format(nb_selected_chains, sbjct, query)) chainer2 = AlignmentChainer(self.db, maxGap=maxGap) self.lSortedChains = chainer2.sortListOfChains(lSelectedChains)