Exemple #1
0
def rotamerHMM(predictedProbs):
    """Given rotamer likelihoods for all suites, predict the most likely rotamer string using a Hidden Markov Model
    
    ARGUMENTS:
        predictedProbs  - the probability for each suite for each rotamer formatted as a list of dictionaries
                          predictedProbs[suiteNum][rotamer] = probability
    RETURNS:
        bestPath        - a list of the most likely rotamer for each suite
    """

    numSuites = len(predictedProbs)
    pathProbs = [
        {} for i in xrange(numSuites)
    ]  #the log probability of having followed a given path (the delta or phi array)
    path = [{} for i in xrange(numSuites)
            ]  #the path followed for $pathProbs (the psi array)

    #initialize the pathProbs list
    for curRot in rotList:
        pathProbs[0][curRot] = ln(predictedProbs[0][curRot])

    for curPos in xrange(1, numSuites):  #for each suite
        for curRot in rotList:  #for each rotamer

            #figure out what the best previous rotamer is for ending up at the current rotamer
            bestPrevRot = max(pathProbs[curPos - 1],
                              key=lambda prevRot: pathProbs[curPos - 1][
                                  prevRot] + __transitionProb(prevRot, curRot))
            path[curPos][curRot] = bestPrevRot
            pathProbs[curPos][curRot] = pathProbs[
                curPos - 1][bestPrevRot] + __transitionProb(
                    bestPrevRot, curRot) + ln(predictedProbs[curPos][curRot])

    #initialize bestPath to the appropriate length
    bestPath = [None] * numSuites

    #figure out the best last position
    curPos = numSuites - 1
    bestPath[curPos] = max(pathProbs[curPos],
                           key=lambda curRot: pathProbs[curPos][curRot])

    #follow the path back to figure out what the best path was
    for curPos in xrange(numSuites - 1, 0, -1):
        bestPath[curPos - 1] = path[curPos][bestPath[curPos]]

    return bestPath
Exemple #2
0
 def secondPhos(self, mapNum, curPhos, direction = 3):
     """find the second phosphate in a chain using phosphate distance data
     
     ARGUMENTS:
         mapNum    - the molecule number of the Coot map to use
         curPhos   - the coordinates of the first phosphate
     OPTIONAL ARGUMENTS:
         direction - which direction to trace the chain: 3 implies 5'->3'
                                                         5 implies 3'->5'
                     defaults to 3 (5'->3')
     RETURNS:
         peakList  - a list of potential phosphate peaks
         sugarList - a list of potential C1' locations for each phosphate peak
     """
     
     peaks = self.getPeaks(mapNum, curPhos)
     
     #calculate the score for each peak
     peakScores = []
     for curPeak in peaks:
         #exclude any peaks that are within an Angstom of the current phosphate position
         #this will exclude the peak corresponding to the current phosphate position
         if dist(curPhos, curPeak) >= 1:
             
             #do a sugar search for the current phosphate
             if direction == 3:
                 sugarLocs = self.findSugar(mapNum, curPhos, curPeak)
             else: #direction == 5:
                 sugarLocs = self.findSugar(mapNum, curPeak, curPhos)
             densityScore = sugarLocs[0][3] + curPeak[3]
             
             score = ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))) + (SECOND_PHOS_DENSITY_WEIGHT * ln(densityScore))
             peakScores.append((curPeak, score, sugarLocs))
     
     #sort the peaks according to score
     peakScores.sort(key = lambda x: x[1], reverse = 1)
     
     #return only the coordinates, not the scores
     peakList  = [x[0] for x in peakScores]
     sugarList = [x[2] for x in peakScores]
     
     return (peakList, sugarList)
Exemple #3
0
 def secondPhos(self, mapNum, curPhos, direction = 3):
     """find the second phosphate in a chain using phosphate distance data
     
     ARGUMENTS:
         mapNum    - the molecule number of the Coot map to use
         curPhos   - the coordinates of the first phosphate
     OPTIONAL ARGUMENTS:
         direction - which direction to trace the chain: 3 implies 5'->3'
                                                         5 implies 3'->5'
                     defaults to 3 (5'->3')
     RETURNS:
         peakList  - a list of potential phosphate peaks
         sugarList - a list of potential C1' locations for each phosphate peak
     """
     
     peaks = self.getPeaks(mapNum, curPhos)
     
     #calculate the score for each peak
     peakScores = []
     for curPeak in peaks:
         #exclude any peaks that are within an Angstom of the current phosphate position
         #this will exclude the peak corresponding to the current phosphate position
         if dist(curPhos, curPeak) >= 1:
             
             #do a sugar search for the current phosphate
             if direction == 3:
                 sugarLocs = self.findSugar(mapNum, curPhos, curPeak)
             else: #direction == 5:
                 sugarLocs = self.findSugar(mapNum, curPeak, curPhos)
             densityScore = sugarLocs[0][3] + curPeak[3]
             
             score = ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))) + (SECOND_PHOS_DENSITY_WEIGHT * ln(densityScore))
             peakScores.append((curPeak, score, sugarLocs))
     
     #sort the peaks according to score
     peakScores.sort(key = lambda x: x[1], reverse = 1)
     
     #return only the coordinates, not the scores
     peakList  = [x[0] for x in peakScores]
     sugarList = [x[2] for x in peakScores]
     
     return (peakList, sugarList)
Exemple #4
0
def rotamerHMM(predictedProbs):
    """Given rotamer likelihoods for all suites, predict the most likely rotamer string using a Hidden Markov Model
    
    ARGUMENTS:
        predictedProbs  - the probability for each suite for each rotamer formatted as a list of dictionaries
                          predictedProbs[suiteNum][rotamer] = probability
    RETURNS:
        bestPath        - a list of the most likely rotamer for each suite
    """
    
    numSuites = len(predictedProbs)
    pathProbs = [{} for i in xrange(numSuites)]  #the log probability of having followed a given path (the delta or phi array)
    path      = [{} for i in xrange(numSuites)]  #the path followed for $pathProbs (the psi array)
    
    #initialize the pathProbs list
    for curRot in rotList:
        pathProbs[0][curRot] = ln(predictedProbs[0][curRot])
    
    for curPos in xrange(1, numSuites):  #for each suite
        for curRot in rotList:                     #for each rotamer
            
            #figure out what the best previous rotamer is for ending up at the current rotamer
            bestPrevRot = max(pathProbs[curPos-1], key = lambda prevRot: pathProbs[curPos-1][prevRot] + __transitionProb(prevRot, curRot))
            path[curPos][curRot]      = bestPrevRot
            pathProbs[curPos][curRot] = pathProbs[curPos-1][bestPrevRot] + __transitionProb(bestPrevRot, curRot) + ln(predictedProbs[curPos][curRot])

    #initialize bestPath to the appropriate length
    bestPath = [None] * numSuites 
    
    #figure out the best last position
    curPos = numSuites - 1
    bestPath[curPos] = max(pathProbs[curPos], key = lambda curRot: pathProbs[curPos][curRot])
    
    #follow the path back to figure out what the best path was
    for curPos in xrange(numSuites-1, 0, -1):
        bestPath[curPos-1] = path[curPos][bestPath[curPos]]
    
    return bestPath
Exemple #5
0
 def nextPhos(self, mapNum, curPhos, prevPhos, prevSugar, direction = 3):
     """Find the next phosphate in a chain using phosphate distance and angle data
     
     ARGUMENTS:
         mapNum    - the molecule number of the Coot map to use
         curPhos   - the coordinates of the current phosphate
         prevPhos  - the coordinates of the previous phosphate
         prevSugar - the coordinates of the previous C1' atom
     OPTIONAL ARGUMENTS:
         direction - which direction to trace the chain: 3 implies 5'->3'
                                                         5 implies 3'->5'
                     defaults to 3 (5'->3')
     RETURNS:
         peakList  - a list of potential phosphate peaks
         sugarList - a list of potential C1' locations for each phosphate peak
     """
     
     peaks = self.getPeaks(mapNum, curPhos)
     
     #calculate the score for each peak
     peakScores = []
     for curPeak in peaks:
         #exclude any peaks that are within an Angstom of the current phosphate position
         #this will exclude the peak corresponding to the current phosphate position
         if dist(curPhos, curPeak) >= 1:
             
             #do a sugar search for the current phosphate
             if direction == 3:
                 sugarLocs = self.findSugar(mapNum, curPhos, curPeak)
             else:
                 sugarLocs = self.findSugar(mapNum, curPeak, curPhos)
             
             densityScore = sugarLocs[0][3] + curPeak[3]
             
             score = ((PHOS_DIST_WEIGHT * ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))))
                      + ln(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak)))
                      + ln(self.__sugarPhosSugarAngleInterp.interp(angle(prevSugar, curPhos, sugarLocs[0][0:3])))
                      + (DENSITY_WEIGHT * ln(densityScore))
                     )
             peakScores.append((curPeak, score, sugarLocs))
             
             #FOR DEBUGGING OUTPUT
             #peakScores.append((curPeak, score, sugarLocs, ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))), ln(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak))), ln(self.__sugarPhosSugarAngleInterp.interp(angle(prevSugar, prevPhos, sugarLocs[0][0:3]))), sugarLocs[0][3], sugarLocs[0][4], sugarLocs[0][5], sugarLocs[0][6], curPeak[3], sugarLocs[0][7]))
             #peakScores.append((curPeak, score, sugarLocs, ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))), ln(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak))), ln(self.__sugarPhosSugarAngleInterp.interp(angle(prevSugar, curPhos, sugarLocs[0][0:3]))), ln(densityScore), DENSITY_WEIGHT * ln(densityScore), sugarLocs[0][3], curPeak[3]))
             #print "Score for peak %(curPeak)s is %(score)f" % vars()
             #print "\tDist: " + str(dist(curPhos, curPeak)) + "\t" + str(self.__phosDistInterp.interp(dist(curPhos, curPeak)))
             #print "\tAngle: " + str(angle(prevPhos, curPhos, curPeak)) + "\t" + str(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak)))
             
     
     #sort the peaks according to score
     peakScores.sort(key = lambda x: x[1], reverse = 1)
     
     #return only the coordinates, not the scores
     peakList  = [x[0] for x in peakScores]
     sugarList = [x[2] for x in peakScores]
     
     #FOR DEBUGGING OUTPUT
     #print "Current scores:"
     #print "overall\t\tphosDist\tphosAngle\tsPsAngle\tdensity score\tw. den score\tsugar score\tphos score"
     #for x in peakScores:
     #    #print "SCORE: %f\tSUGAR SCORE: %f" % (x[1], x[2][0][3])
     #    print "\t".join(map(str, (x[1],) + x[3:]))
     
     return (peakList, sugarList)
Exemple #6
0
 def nextPhos(self, mapNum, curPhos, prevPhos, prevSugar, direction = 3):
     """Find the next phosphate in a chain using phosphate distance and angle data
     
     ARGUMENTS:
         mapNum    - the molecule number of the Coot map to use
         curPhos   - the coordinates of the current phosphate
         prevPhos  - the coordinates of the previous phosphate
         prevSugar - the coordinates of the previous C1' atom
     OPTIONAL ARGUMENTS:
         direction - which direction to trace the chain: 3 implies 5'->3'
                                                         5 implies 3'->5'
                     defaults to 3 (5'->3')
     RETURNS:
         peakList  - a list of potential phosphate peaks
         sugarList - a list of potential C1' locations for each phosphate peak
     """
     
     peaks = self.getPeaks(mapNum, curPhos)
     
     #calculate the score for each peak
     peakScores = []
     for curPeak in peaks:
         #exclude any peaks that are within an Angstom of the current phosphate position
         #this will exclude the peak corresponding to the current phosphate position
         if dist(curPhos, curPeak) >= 1:
             
             #do a sugar search for the current phosphate
             if direction == 3:
                 sugarLocs = self.findSugar(mapNum, curPhos, curPeak)
             else:
                 sugarLocs = self.findSugar(mapNum, curPeak, curPhos)
             
             densityScore = sugarLocs[0][3] + curPeak[3]
             
             score = ((PHOS_DIST_WEIGHT * ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))))
                      + ln(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak)))
                      + ln(self.__sugarPhosSugarAngleInterp.interp(angle(prevSugar, curPhos, sugarLocs[0][0:3])))
                      + (DENSITY_WEIGHT * ln(densityScore))
                     )
             peakScores.append((curPeak, score, sugarLocs))
             
             #FOR DEBUGGING OUTPUT
             #peakScores.append((curPeak, score, sugarLocs, ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))), ln(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak))), ln(self.__sugarPhosSugarAngleInterp.interp(angle(prevSugar, prevPhos, sugarLocs[0][0:3]))), sugarLocs[0][3], sugarLocs[0][4], sugarLocs[0][5], sugarLocs[0][6], curPeak[3], sugarLocs[0][7]))
             #peakScores.append((curPeak, score, sugarLocs, ln(self.__phosDistInterp.interp(dist(curPhos, curPeak))), ln(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak))), ln(self.__sugarPhosSugarAngleInterp.interp(angle(prevSugar, curPhos, sugarLocs[0][0:3]))), ln(densityScore), DENSITY_WEIGHT * ln(densityScore), sugarLocs[0][3], curPeak[3]))
             #print "Score for peak %(curPeak)s is %(score)f" % vars()
             #print "\tDist: " + str(dist(curPhos, curPeak)) + "\t" + str(self.__phosDistInterp.interp(dist(curPhos, curPeak)))
             #print "\tAngle: " + str(angle(prevPhos, curPhos, curPeak)) + "\t" + str(self.__phosAngleInterp.interp(angle(prevPhos, curPhos, curPeak)))
             
     
     #sort the peaks according to score
     peakScores.sort(key = lambda x: x[1], reverse = 1)
     
     #return only the coordinates, not the scores
     peakList  = [x[0] for x in peakScores]
     sugarList = [x[2] for x in peakScores]
     
     #FOR DEBUGGING OUTPUT
     #print "Current scores:"
     #print "overall\t\tphosDist\tphosAngle\tsPsAngle\tdensity score\tw. den score\tsugar score\tphos score"
     #for x in peakScores:
     #    #print "SCORE: %f\tSUGAR SCORE: %f" % (x[1], x[2][0][3])
     #    print "\t".join(map(str, (x[1],) + x[3:]))
     
     return (peakList, sugarList)