Beispiel #1
0
def findAllMotifs_SameLine(motifList, seqName, dictOfFastas, resultList):
    
    for motif in motifList:
        
        #  convert from IUPAC to regEx and search in forward direction
        fwd_RegExMotif = re.compile(JamesDefs.iupac2regex(motif), re.IGNORECASE)
        
        #  initiate result string for forward matches with name of AGAP and fwd IUPAC motif string
        MatchesStr = seqName+'\t'+motif
        
        #  initiate location list to hold then sort locations
        matchLocations = []
        
        #  sequentially append each hit's coords to the end of locationList
        for fwdMatcheObj in fwd_RegExMotif.finditer(dictOfFastas[seqName].seq.tostring()):
            
            ## must add 1 to the start pos due to computer numbers.............*
            matchLocations.append(fwdMatcheObj.start()+1)
        
        # commented this because i am combining fwd and rev hits onto one line for memory's sake down the pipe
        ### add trailing newline for printing to file later
        ##fwd_MatchesStr = fwd_MatchesStr+'\n'
        
        ### send fwd results to resultList
        ##resultList.append(fwd_MatchesStr)
        
        #  convert from IUPAC to regEx and search in reverse direction
        rev_RegExMotif = re.compile(JamesDefs.iupac2regex(JamesDefs.revComp(motif)), re.IGNORECASE)
        
        ###  initiate result string for forward matches with name of AGAP and fwd IUPAC motif string
        ##rev_MatchesStr = seqName+'\t'+motif+'_rc\t'
        
        #  sequentially append each hit's coords to the end of rev_MatchesStr
        for revMatcheObj in rev_RegExMotif.finditer(dictOfFastas[seqName].seq.tostring()):
            
            ## must add 1 to the start pos due to computer numbers.............*
            matchLocations.append(revMatcheObj.start()+1) 
            
        #  sort locations by start
        matchLocations.sort()
        
        #  format matchStr
        for loc in matchLocations:
            MatchesStr = "%s\t%i" % (MatchesStr, loc)
        
        #  add trailing newline for printing to file later
        MatchesStr = MatchesStr+'\n'
        
        #  send fwd results to resultList
        resultList.append(MatchesStr)
def countModuleInAll(module, seqDict):

    import re

    ##motifList = moduleStrs.split('\t')

    #  Calculate revComp for each motif, convert them into perl-like regular expressions, compile those RegExStrings into
    #    python regEx objs and modify data structure to be list of lists with each
    #    2ary list = [regExObj_Fwd, regExObj_Rev]

    i = 0
    for IUPACmotif in module:
        #  convert data struct and calc revComp
        module[i] = [IUPACmotif, [IUPACmotif, JamesDefs.revComp(IUPACmotif)]]
        c = 0
        for each in module[i][1]:
            module[i][1][c] = JamesDefs.iupac2regex(module[i][1][c])
            c += 1
        module[i][1] = makeFwdAndRevCompRegExObj(module[i][1])
        i += 1

    #  Loop over list and count module in fwd and revComp oris for presence absense
    #  Sum total hits in totalHits

    totalHits = 0

    for record in seqDict:
        #  initiate modulePresent attrib to 0
        seqDict[record].modulePresent = 0

        hit = findModuleInLength(module, seqDict[record], 500)
        totalHits += hit

    return totalHits
def makeFwdAndRevCompRegExObj(motif):
    import re
    
    motifPair = [motif, JamesDefs.revComp(motif)]
    
    #  convert iupac string to regEx string
    for i in range(len(motifPair)):
        motifPair[i] = JamesDefs.iupac2regex(motifPair[i])
        
    motif = '(%s|%s)' % (motifPair[0], motifPair[1])
    
    fwdRevComp_regExObj = re.compile(motif, re.IGNORECASE)
    return fwdRevComp_regExObj
Beispiel #4
0
def findAllMotifs(motifList, seqName, dictOfFastas, resultList):
    
    for motif in motifList:
        
        #  convert from IUPAC to regEx and search in forward direction
        fwd_RegExMotif = re.compile(JamesDefs.iupac2regex(motif), re.IGNORECASE)
        
        #  initiate result string for forward matches with name of AGAP and fwd IUPAC motif string
        fwd_MatchesStr = seqName+'\t'+motif+'\t'
        
        #  sequentially append each hit's coords to the end of fwd_MatchesStr
        for fwdMatcheObj in fwd_RegExMotif.finditer(dictOfFastas[seqName].seq.tostring()):
            
            ## must add 1 to the start pos due to computer numbers.............*
            fwd_MatchesStr = fwd_MatchesStr+'%s\t' % (str(fwdMatcheObj.start()+1)) 
            
        # add trailing newline for printing to file later
        fwd_MatchesStr = fwd_MatchesStr+'\n'
        
        # send fwd results to resultList
        resultList.append(fwd_MatchesStr)
        
        #  convert from IUPAC to regEx and search in reverse direction
        rev_RegExMotif = re.compile(JamesDefs.iupac2regex(JamesDefs.revComp(motif)), re.IGNORECASE)
        
        #  initiate result string for forward matches with name of AGAP and fwd IUPAC motif string
        rev_MatchesStr = seqName+'\t'+motif+'_rc\t'
        
        #  sequentially append each hit's coords to the end of rev_MatchesStr
        for revMatcheObj in rev_RegExMotif.finditer(dictOfFastas[seqName].seq.tostring()):
            
            ## must add 1 to the start pos due to computer numbers.............*
            rev_MatchesStr = rev_MatchesStr+'%s\t' % (str(revMatcheObj.start()+1)) 
            
        # add trailing newline for printing to file later
        rev_MatchesStr = rev_MatchesStr+'\n'
        
        # send fwd results to resultList
        resultList.append(rev_MatchesStr)
Beispiel #5
0
def convertMotifList(motifList):
    i = 0
    while i < len(motifList):
        motifList[i] = [motifList[i], JamesDefs.iupac2regex(motifList[i])]
        i += 1