Esempio n. 1
0
def alignSeqs(seqsFN, dbName, wordSize, outFN, maxNumMismatches, sendExitSignal = False):
        maxNumMismatches = int(maxNumMismatches)
        sendExitSignal = bool(sendExitSignal)

        timer = bioLibCG.cgTimer()
        timer.start()
        #put seqs in cgSeq object, align
        wName = dbName + '.wDB'
        sName = dbName + '.sDB'
        wordSize = int(wordSize)
        
        #load dbs
        #print 'loading Sequence Database'
        sDB = cgAlign.loadSequenceDatabase(sName)
        print timer.split()
        #print 'loading Word Database'
        wDB = cgAlign.loadWordDatabase(wName)
        print timer.split()

        #align each seq
        f = open(seqsFN, 'r')
        fOut = open(outFN, 'w')
        for line in f:
                qSeq = cgAlign.cgSeq(line.strip().split('\t')[0], line.strip().split('\t')[1])
                
                #write out the alignments
                cgAlign.alignQuery(qSeq, wDB, sDB, wordSize, maxNumMismatches, fOut)
        
        f.close()
        fOut.close()
        
        print timer.split()
        if sendExitSignal:
                cgExit.sendExitSignal(seqsFN)
Esempio n. 2
0
def createDatabases(targetsFN, wordSize, runName, hasIDs=False):
    wordSize = int(wordSize)
    hasIDs = (hasIDs == "True")
    print 'using IDs', hasIDs
    #make sequence list out of targets, make db, write to file
    f = open(targetsFN, 'r')
    seqList = []
    print 'obtaining sequences'
    i = 0
    for line in f:
        if hasIDs:
            theID, seq = line.strip().split('\t')
        else:
            theID, seq = i, line.strip()
        seqList.append(cgAlign.cgSeq(theID, seq))
        i += 1
    f.close()

    print 'making word db'
    wordDB = cgAlign.createWordDatabase(seqList, wordSize)
    cgAlign.writeWordDatabase(wordDB, runName)

    print 'making seq db'
    seqDB = cgAlign.createSequenceDatabase(seqList)
    cgAlign.writeSequenceDatabase(seqDB, runName)
Esempio n. 3
0
def createDatabases(targetsFN, wordSize, runName, hasIDs = False):
        wordSize = int(wordSize)
        hasIDs = (hasIDs == "True")
        print 'using IDs', hasIDs
        #make sequence list out of targets, make db, write to file
        f = open(targetsFN, 'r')
        seqList = []
        print 'obtaining sequences'
        i = 0
        for line in f:
                if hasIDs:
                    theID, seq = line.strip().split('\t')
                else:
                    theID, seq = i, line.strip()
                seqList.append(cgAlign.cgSeq(theID, seq))
                i += 1
        f.close()

        print 'making word db'
        wordDB = cgAlign.createWordDatabase(seqList, wordSize)
        cgAlign.writeWordDatabase(wordDB, runName)

        print 'making seq db'
        seqDB = cgAlign.createSequenceDatabase(seqList)
        cgAlign.writeSequenceDatabase(seqDB, runName)
def createDatabases(targetsFN, wordSize, runName):
    wordSize = int(wordSize)

    # make sequence list out of targets, make db, write to file
    f = open(targetsFN, "r")
    seqList = []
    print "obtaining sequences"
    i = 0
    for line in f:
        seqList.append(cgAlign.cgSeq(i, line.strip()))
        i += 1
    f.close()

    print "making word db"
    wordDB = cgAlign.createWordDatabase(seqList, wordSize)
    cgAlign.writeWordDatabase(wordDB, runName)

    print "making seq db"
    seqDB = cgAlign.createSequenceDatabase(seqList)
    cgAlign.writeSequenceDatabase(seqDB, runName)
def createDatabases(targetsFN, wordSize, runName):
        wordSize = int(wordSize)

        #make sequence list out of targets, make db, write to file
        f = open(targetsFN, 'r')
        seqList = []
        print 'obtaining sequences'
        i = 0
        for line in f:
                seqList.append(cgAlign.cgSeq(i, line.strip()))
                i += 1
        f.close()

        print 'making word db'
        wordDB = cgAlign.createWordDatabase(seqList, wordSize)
        cgAlign.writeWordDatabase(wordDB, runName)

        print 'making seq db'
        seqDB = cgAlign.createSequenceDatabase(seqList)
        cgAlign.writeSequenceDatabase(seqDB, runName)
Esempio n. 6
0
def alignSeqs(seqsFN,
              dbName,
              wordSize,
              outFN,
              maxNumMismatches,
              sendExitSignal=False):
    maxNumMismatches = int(maxNumMismatches)
    sendExitSignal = bool(sendExitSignal)

    timer = bioLibCG.cgTimer()
    timer.start()
    #put seqs in cgSeq object, align
    wName = dbName + '.wDB'
    sName = dbName + '.sDB'
    wordSize = int(wordSize)

    #load dbs
    #print 'loading Sequence Database'
    sDB = cgAlign.loadSequenceDatabase(sName)
    print timer.split()
    #print 'loading Word Database'
    wDB = cgAlign.loadWordDatabase(wName)
    print timer.split()

    #align each seq
    f = open(seqsFN, 'r')
    fOut = open(outFN, 'w')
    for line in f:
        qSeq = cgAlign.cgSeq(line.strip().split('\t')[0],
                             line.strip().split('\t')[1])

        #write out the alignments
        cgAlign.alignQuery(qSeq, wDB, sDB, wordSize, maxNumMismatches, fOut)

    f.close()
    fOut.close()

    print timer.split()
    if sendExitSignal:
        cgExit.sendExitSignal(seqsFN)
Esempio n. 7
0
import cgAlign

query = 'CATACTTCCACGCCCAGCTCCATAATAACCC'
#target = 'ATGCGTGTTTCTTGCGCGATCG'

#format the sequences
#tSeq = cgAlign.cgSeq(0,target)
#tSeqList = [tSeq]
qSeq = cgAlign.cgSeq(0, query)

#Make target databases
#seqDB = cgAlign.createSequenceDatabase(tSeqList)
#wordDB = cgAlign.createWordDatabase(tSeqList, 4)

#print tSeqList
#print seqDB
#print wordDB

seqDB = cgAlign.loadSequenceDatabase('tester.sDB')
wordDB = cgAlign.loadWordDatabase('tester.wDB')

cgAlign.alignQuery(qSeq, wordDB, seqDB, 5)
Esempio n. 8
0
import cgAlign

query = 'CATACTTCCACGCCCAGCTCCATAATAACCC' 
#target = 'ATGCGTGTTTCTTGCGCGATCG'

#format the sequences
#tSeq = cgAlign.cgSeq(0,target)
#tSeqList = [tSeq]
qSeq = cgAlign.cgSeq(0, query)

#Make target databases
#seqDB = cgAlign.createSequenceDatabase(tSeqList)
#wordDB = cgAlign.createWordDatabase(tSeqList, 4)

#print tSeqList
#print seqDB
#print wordDB


seqDB = cgAlign.loadSequenceDatabase('tester.sDB')
wordDB = cgAlign.loadWordDatabase('tester.wDB')


cgAlign.alignQuery(qSeq, wordDB, seqDB, 5)