def rotamerHMM(predictedProbs): """Given rotamer likelihoods for all suites, predict the most likely rotamer string using a Hidden Markov Model ARGUMENTS: predictedProbs - the probability for each suite for each rotamer formatted as a list of dictionaries predictedProbs[suiteNum][rotamer] = probability RETURNS: bestPath - a list of the most likely rotamer for each suite """ numSuites = len(predictedProbs) pathProbs = [ {} for i in xrange(numSuites) ] #the log probability of having followed a given path (the delta or phi array) path = [{} for i in xrange(numSuites) ] #the path followed for $pathProbs (the psi array) #initialize the pathProbs list for curRot in rotList: pathProbs[0][curRot] = ln(predictedProbs[0][curRot]) for curPos in xrange(1, numSuites): #for each suite for curRot in rotList: #for each rotamer #figure out what the best previous rotamer is for ending up at the current rotamer bestPrevRot = max(pathProbs[curPos - 1], key=lambda prevRot: pathProbs[curPos - 1][ prevRot] + __transitionProb(prevRot, curRot)) path[curPos][curRot] = bestPrevRot pathProbs[curPos][curRot] = pathProbs[ curPos - 1][bestPrevRot] + __transitionProb( bestPrevRot, curRot) + ln(predictedProbs[curPos][curRot]) #initialize bestPath to the appropriate length bestPath = [None] * numSuites #figure out the best last position curPos = numSuites - 1 bestPath[curPos] = max(pathProbs[curPos], key=lambda curRot: pathProbs[curPos][curRot]) #follow the path back to figure out what the best path was for curPos in xrange(numSuites - 1, 0, -1): bestPath[curPos - 1] = path[curPos][bestPath[curPos]] return bestPath
def secondPhos(self, mapNum, curPhos, direction = 3): """find the second phosphate in a chain using phosphate distance data ARGUMENTS: mapNum - the molecule number of the Coot map to use curPhos - the coordinates of the first phosphate OPTIONAL ARGUMENTS: direction - which direction to trace the chain: 3 implies 5'->3' 5 implies 3'->5' defaults to 3 (5'->3') RETURNS: peakList - a list of potential phosphate peaks sugarList - a list of potential C1' locations for each phosphate peak """ peaks = self.getPeaks(mapNum, curPhos) #calculate the score for each peak peakScores = [] for curPeak in peaks: #exclude any peaks that are within an Angstom of the current phosphate position #this will exclude the peak corresponding to the current phosphate position if dist(curPhos, curPeak) >= 1: #do a sugar search for the current phosphate if direction == 3: sugarLocs = self.findSugar(mapNum, curPhos, curPeak) else: #direction == 5: sugarLocs = self.findSugar(mapNum, curPeak, curPhos) densityScore = sugarLocs[0][3] + curPeak[3] score = ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))) + (SECOND_PHOS_DENSITY_WEIGHT * ln(densityScore)) peakScores.append((curPeak, score, sugarLocs)) #sort the peaks according to score peakScores.sort(key = lambda x: x[1], reverse = 1) #return only the coordinates, not the scores peakList = [x[0] for x in peakScores] sugarList = [x[2] for x in peakScores] return (peakList, sugarList)
def rotamerHMM(predictedProbs): """Given rotamer likelihoods for all suites, predict the most likely rotamer string using a Hidden Markov Model ARGUMENTS: predictedProbs - the probability for each suite for each rotamer formatted as a list of dictionaries predictedProbs[suiteNum][rotamer] = probability RETURNS: bestPath - a list of the most likely rotamer for each suite """ numSuites = len(predictedProbs) pathProbs = [{} for i in xrange(numSuites)] #the log probability of having followed a given path (the delta or phi array) path = [{} for i in xrange(numSuites)] #the path followed for $pathProbs (the psi array) #initialize the pathProbs list for curRot in rotList: pathProbs[0][curRot] = ln(predictedProbs[0][curRot]) for curPos in xrange(1, numSuites): #for each suite for curRot in rotList: #for each rotamer #figure out what the best previous rotamer is for ending up at the current rotamer bestPrevRot = max(pathProbs[curPos-1], key = lambda prevRot: pathProbs[curPos-1][prevRot] + __transitionProb(prevRot, curRot)) path[curPos][curRot] = bestPrevRot pathProbs[curPos][curRot] = pathProbs[curPos-1][bestPrevRot] + __transitionProb(bestPrevRot, curRot) + ln(predictedProbs[curPos][curRot]) #initialize bestPath to the appropriate length bestPath = [None] * numSuites #figure out the best last position curPos = numSuites - 1 bestPath[curPos] = max(pathProbs[curPos], key = lambda curRot: pathProbs[curPos][curRot]) #follow the path back to figure out what the best path was for curPos in xrange(numSuites-1, 0, -1): bestPath[curPos-1] = path[curPos][bestPath[curPos]] return bestPath
def nextPhos(self, mapNum, curPhos, prevPhos, prevSugar, direction = 3): """Find the next phosphate in a chain using phosphate distance and angle data ARGUMENTS: mapNum - the molecule number of the Coot map to use curPhos - the coordinates of the current phosphate prevPhos - the coordinates of the previous phosphate prevSugar - the coordinates of the previous C1' atom OPTIONAL ARGUMENTS: direction - which direction to trace the chain: 3 implies 5'->3' 5 implies 3'->5' defaults to 3 (5'->3') RETURNS: peakList - a list of potential phosphate peaks sugarList - a list of potential C1' locations for each phosphate peak """ peaks = self.getPeaks(mapNum, curPhos) #calculate the score for each peak peakScores = [] for curPeak in peaks: #exclude any peaks that are within an Angstom of the current phosphate position #this will exclude the peak corresponding to the current phosphate position if dist(curPhos, curPeak) >= 1: #do a sugar search for the current phosphate if direction == 3: sugarLocs = self.findSugar(mapNum, curPhos, curPeak) else: sugarLocs = self.findSugar(mapNum, curPeak, curPhos) densityScore = sugarLocs[0][3] + curPeak[3] score = ((PHOS_DIST_WEIGHT * ln(self.__phosDistInterp.interp(dist(curPhos, curPeak)))) + ln(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak))) + ln(self.__sugarPhosSugarAngleInterp.interp(angle(prevSugar, curPhos, sugarLocs[0][0:3]))) + (DENSITY_WEIGHT * ln(densityScore)) ) peakScores.append((curPeak, score, sugarLocs)) #FOR DEBUGGING OUTPUT #peakScores.append((curPeak, score, sugarLocs, ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))), ln(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak))), ln(self.__sugarPhosSugarAngleInterp.interp(angle(prevSugar, prevPhos, sugarLocs[0][0:3]))), sugarLocs[0][3], sugarLocs[0][4], sugarLocs[0][5], sugarLocs[0][6], curPeak[3], sugarLocs[0][7])) #peakScores.append((curPeak, score, sugarLocs, ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))), ln(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak))), ln(self.__sugarPhosSugarAngleInterp.interp(angle(prevSugar, curPhos, sugarLocs[0][0:3]))), ln(densityScore), DENSITY_WEIGHT * ln(densityScore), sugarLocs[0][3], curPeak[3])) #print "Score for peak %(curPeak)s is %(score)f" % vars() #print "\tDist: " + str(dist(curPhos, curPeak)) + "\t" + str(self.__phosDistInterp.interp(dist(curPhos, curPeak))) #print "\tAngle: " + str(angle(prevPhos, curPhos, curPeak)) + "\t" + str(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak))) #sort the peaks according to score peakScores.sort(key = lambda x: x[1], reverse = 1) #return only the coordinates, not the scores peakList = [x[0] for x in peakScores] sugarList = [x[2] for x in peakScores] #FOR DEBUGGING OUTPUT #print "Current scores:" #print "overall\t\tphosDist\tphosAngle\tsPsAngle\tdensity score\tw. den score\tsugar score\tphos score" #for x in peakScores: # #print "SCORE: %f\tSUGAR SCORE: %f" % (x[1], x[2][0][3]) # print "\t".join(map(str, (x[1],) + x[3:])) return (peakList, sugarList)