def revCmp(self, toString=0): assert toString == 0 or 1 # 0 = return new SeqObj; 1 = return string if toString == 1: return JamesDefs.revComp(self.toString()) else: return self.__class__(JamesDefs.revComp(self.toString()))
def countModuleInAll(module, seqDict): import re ##motifList = moduleStrs.split('\t') # Calculate revComp for each motif, convert them into perl-like regular expressions, compile those RegExStrings into # python regEx objs and modify data structure to be list of lists with each # 2ary list = [regExObj_Fwd, regExObj_Rev] i = 0 for IUPACmotif in module: # convert data struct and calc revComp module[i] = [IUPACmotif, [IUPACmotif, JamesDefs.revComp(IUPACmotif)]] c = 0 for each in module[i][1]: module[i][1][c] = JamesDefs.iupac2regex(module[i][1][c]) c += 1 module[i][1] = makeFwdAndRevCompRegExObj(module[i][1]) i += 1 # Loop over list and count module in fwd and revComp oris for presence absense # Sum total hits in totalHits totalHits = 0 for record in seqDict: # initiate modulePresent attrib to 0 seqDict[record].modulePresent = 0 hit = findModuleInLength(module, seqDict[record], 500) totalHits += hit return totalHits
def makeFwdAndRevCompRegExObj(motif): import re motifPair = [motif, JamesDefs.revComp(motif)] # convert iupac string to regEx string for i in range(len(motifPair)): motifPair[i] = JamesDefs.iupac2regex(motifPair[i]) motif = '(%s|%s)' % (motifPair[0], motifPair[1]) fwdRevComp_regExObj = re.compile(motif, re.IGNORECASE) return fwdRevComp_regExObj
def findAllMotifs_SameLine(motifList, seqName, dictOfFastas, resultList): for motif in motifList: # convert from IUPAC to regEx and search in forward direction fwd_RegExMotif = re.compile(JamesDefs.iupac2regex(motif), re.IGNORECASE) # initiate result string for forward matches with name of AGAP and fwd IUPAC motif string MatchesStr = seqName+'\t'+motif # initiate location list to hold then sort locations matchLocations = [] # sequentially append each hit's coords to the end of locationList for fwdMatcheObj in fwd_RegExMotif.finditer(dictOfFastas[seqName].seq.tostring()): ## must add 1 to the start pos due to computer numbers.............* matchLocations.append(fwdMatcheObj.start()+1) # commented this because i am combining fwd and rev hits onto one line for memory's sake down the pipe ### add trailing newline for printing to file later ##fwd_MatchesStr = fwd_MatchesStr+'\n' ### send fwd results to resultList ##resultList.append(fwd_MatchesStr) # convert from IUPAC to regEx and search in reverse direction rev_RegExMotif = re.compile(JamesDefs.iupac2regex(JamesDefs.revComp(motif)), re.IGNORECASE) ### initiate result string for forward matches with name of AGAP and fwd IUPAC motif string ##rev_MatchesStr = seqName+'\t'+motif+'_rc\t' # sequentially append each hit's coords to the end of rev_MatchesStr for revMatcheObj in rev_RegExMotif.finditer(dictOfFastas[seqName].seq.tostring()): ## must add 1 to the start pos due to computer numbers.............* matchLocations.append(revMatcheObj.start()+1) # sort locations by start matchLocations.sort() # format matchStr for loc in matchLocations: MatchesStr = "%s\t%i" % (MatchesStr, loc) # add trailing newline for printing to file later MatchesStr = MatchesStr+'\n' # send fwd results to resultList resultList.append(MatchesStr)
def makeFwdAndRevCompRegExObj_IUPAC(motif, equals=0): import re motifPair = [motif, JamesDefs.revComp(motif)] targetContainsMotif = '(%s|%s)' % (motifPair[0], motifPair[1]) targetISMotif = '^(%s|%s)&' % (motifPair[0], motifPair[1]) if equals == 1: motif = targetISMotif elif equals == 0: motif = targetContainsMotif fwdRevComp_regExObj = re.compile(motif, re.IGNORECASE) return fwdRevComp_regExObj
def findAllMotifs(motifList, seqName, dictOfFastas, resultList): for motif in motifList: # convert from IUPAC to regEx and search in forward direction fwd_RegExMotif = re.compile(JamesDefs.iupac2regex(motif), re.IGNORECASE) # initiate result string for forward matches with name of AGAP and fwd IUPAC motif string fwd_MatchesStr = seqName+'\t'+motif+'\t' # sequentially append each hit's coords to the end of fwd_MatchesStr for fwdMatcheObj in fwd_RegExMotif.finditer(dictOfFastas[seqName].seq.tostring()): ## must add 1 to the start pos due to computer numbers.............* fwd_MatchesStr = fwd_MatchesStr+'%s\t' % (str(fwdMatcheObj.start()+1)) # add trailing newline for printing to file later fwd_MatchesStr = fwd_MatchesStr+'\n' # send fwd results to resultList resultList.append(fwd_MatchesStr) # convert from IUPAC to regEx and search in reverse direction rev_RegExMotif = re.compile(JamesDefs.iupac2regex(JamesDefs.revComp(motif)), re.IGNORECASE) # initiate result string for forward matches with name of AGAP and fwd IUPAC motif string rev_MatchesStr = seqName+'\t'+motif+'_rc\t' # sequentially append each hit's coords to the end of rev_MatchesStr for revMatcheObj in rev_RegExMotif.finditer(dictOfFastas[seqName].seq.tostring()): ## must add 1 to the start pos due to computer numbers.............* rev_MatchesStr = rev_MatchesStr+'%s\t' % (str(revMatcheObj.start()+1)) # add trailing newline for printing to file later rev_MatchesStr = rev_MatchesStr+'\n' # send fwd results to resultList resultList.append(rev_MatchesStr)