def alignSeqs(seqsFN, dbName, wordSize, outFN, maxNumMismatches, sendExitSignal = False): maxNumMismatches = int(maxNumMismatches) sendExitSignal = bool(sendExitSignal) timer = bioLibCG.cgTimer() timer.start() #put seqs in cgSeq object, align wName = dbName + '.wDB' sName = dbName + '.sDB' wordSize = int(wordSize) #load dbs #print 'loading Sequence Database' sDB = cgAlign.loadSequenceDatabase(sName) print timer.split() #print 'loading Word Database' wDB = cgAlign.loadWordDatabase(wName) print timer.split() #align each seq f = open(seqsFN, 'r') fOut = open(outFN, 'w') for line in f: qSeq = cgAlign.cgSeq(line.strip().split('\t')[0], line.strip().split('\t')[1]) #write out the alignments cgAlign.alignQuery(qSeq, wDB, sDB, wordSize, maxNumMismatches, fOut) f.close() fOut.close() print timer.split() if sendExitSignal: cgExit.sendExitSignal(seqsFN)
def createDatabases(targetsFN, wordSize, runName, hasIDs=False): wordSize = int(wordSize) hasIDs = (hasIDs == "True") print 'using IDs', hasIDs #make sequence list out of targets, make db, write to file f = open(targetsFN, 'r') seqList = [] print 'obtaining sequences' i = 0 for line in f: if hasIDs: theID, seq = line.strip().split('\t') else: theID, seq = i, line.strip() seqList.append(cgAlign.cgSeq(theID, seq)) i += 1 f.close() print 'making word db' wordDB = cgAlign.createWordDatabase(seqList, wordSize) cgAlign.writeWordDatabase(wordDB, runName) print 'making seq db' seqDB = cgAlign.createSequenceDatabase(seqList) cgAlign.writeSequenceDatabase(seqDB, runName)
def createDatabases(targetsFN, wordSize, runName, hasIDs = False): wordSize = int(wordSize) hasIDs = (hasIDs == "True") print 'using IDs', hasIDs #make sequence list out of targets, make db, write to file f = open(targetsFN, 'r') seqList = [] print 'obtaining sequences' i = 0 for line in f: if hasIDs: theID, seq = line.strip().split('\t') else: theID, seq = i, line.strip() seqList.append(cgAlign.cgSeq(theID, seq)) i += 1 f.close() print 'making word db' wordDB = cgAlign.createWordDatabase(seqList, wordSize) cgAlign.writeWordDatabase(wordDB, runName) print 'making seq db' seqDB = cgAlign.createSequenceDatabase(seqList) cgAlign.writeSequenceDatabase(seqDB, runName)
def createDatabases(targetsFN, wordSize, runName): wordSize = int(wordSize) # make sequence list out of targets, make db, write to file f = open(targetsFN, "r") seqList = [] print "obtaining sequences" i = 0 for line in f: seqList.append(cgAlign.cgSeq(i, line.strip())) i += 1 f.close() print "making word db" wordDB = cgAlign.createWordDatabase(seqList, wordSize) cgAlign.writeWordDatabase(wordDB, runName) print "making seq db" seqDB = cgAlign.createSequenceDatabase(seqList) cgAlign.writeSequenceDatabase(seqDB, runName)
def createDatabases(targetsFN, wordSize, runName): wordSize = int(wordSize) #make sequence list out of targets, make db, write to file f = open(targetsFN, 'r') seqList = [] print 'obtaining sequences' i = 0 for line in f: seqList.append(cgAlign.cgSeq(i, line.strip())) i += 1 f.close() print 'making word db' wordDB = cgAlign.createWordDatabase(seqList, wordSize) cgAlign.writeWordDatabase(wordDB, runName) print 'making seq db' seqDB = cgAlign.createSequenceDatabase(seqList) cgAlign.writeSequenceDatabase(seqDB, runName)
def alignSeqs(seqsFN, dbName, wordSize, outFN, maxNumMismatches, sendExitSignal=False): maxNumMismatches = int(maxNumMismatches) sendExitSignal = bool(sendExitSignal) timer = bioLibCG.cgTimer() timer.start() #put seqs in cgSeq object, align wName = dbName + '.wDB' sName = dbName + '.sDB' wordSize = int(wordSize) #load dbs #print 'loading Sequence Database' sDB = cgAlign.loadSequenceDatabase(sName) print timer.split() #print 'loading Word Database' wDB = cgAlign.loadWordDatabase(wName) print timer.split() #align each seq f = open(seqsFN, 'r') fOut = open(outFN, 'w') for line in f: qSeq = cgAlign.cgSeq(line.strip().split('\t')[0], line.strip().split('\t')[1]) #write out the alignments cgAlign.alignQuery(qSeq, wDB, sDB, wordSize, maxNumMismatches, fOut) f.close() fOut.close() print timer.split() if sendExitSignal: cgExit.sendExitSignal(seqsFN)
import cgAlign query = 'CATACTTCCACGCCCAGCTCCATAATAACCC' #target = 'ATGCGTGTTTCTTGCGCGATCG' #format the sequences #tSeq = cgAlign.cgSeq(0,target) #tSeqList = [tSeq] qSeq = cgAlign.cgSeq(0, query) #Make target databases #seqDB = cgAlign.createSequenceDatabase(tSeqList) #wordDB = cgAlign.createWordDatabase(tSeqList, 4) #print tSeqList #print seqDB #print wordDB seqDB = cgAlign.loadSequenceDatabase('tester.sDB') wordDB = cgAlign.loadWordDatabase('tester.wDB') cgAlign.alignQuery(qSeq, wordDB, seqDB, 5)