Example #1
0
 def getClustersConnections(self):
   '''using the data in self.connections, count the number of clusters.'''
   clusters = unionfind2.unionFind()
   for connection in self.connections:
     node1, node2 = connection[0], connection[1]  # unpack
     clusters.union(node1, node2)  # really is that simple
   return clusters.toLists()
Example #2
0
 def getRMSDclustersAll(self, rmsdCutoff=None, numClusters=1):
   '''uses the rmsdlist to make clusters of conformations based on rmsd.
   goes until either the rmsdCutoff is reached or numClusters is reached.
   using the numClusters will make this run very slowly. uses ALL linkage not
   single linkage.'''
   self.getRMSDtable() #make the table, or ensure it is made
   #self.rmsdList is a tuple of (rmsd, conf, conf)
   #self.rmsdTable is a dict of [conf][conf] -> rmsd
   clusters = unionfind2.unionFind()
   for xyzCount in xrange(len(self.atomXyz)):
     clusters.find(xyzCount) #init
   if rmsdCutoff is None:
     rmsdCutoff = self.rmsdList[-1][0] + 1.0 #make it never happen
   for rmsdTuple in self.rmsdList:
     if rmsdTuple[0] > rmsdCutoff:
       break #quit combining things!
     #have to do all linkage not just single.. oh my
     if clusters.different(rmsdTuple[1], rmsdTuple[2]): #otherwise already join
       combine = True
       clusterOne = clusters.getList(rmsdTuple[1])
       clusterTwo = clusters.getList(rmsdTuple[2])
       #print clusterOne, clusterTwo,
       for clusterOneRep in clusterOne:
         for clusterTwoRep in clusterTwo:
           thisRMSD = self.rmsdTable[clusterOneRep][clusterTwoRep]
           #print thisRMSD, 
           if thisRMSD > rmsdTuple[0]: #means we can't combine yet
             combine = False
             break
         if not combine:
           break
       #print combine
       if combine:
         clusters.union(rmsdTuple[1], rmsdTuple[2])
   return clusters.toLists()
Example #3
0
 def _findConformations(self, atomBonds, xyzData):
   '''uses bond and xyzs to figure out what sets of neighboring atoms move
   together and assign them to conformations and assign each set a specific
   bunch of conformations.'''
   #self.rigidComponent is the list of atom numbers for the rigid comp
   #self.atomsAssigned is the set of atom numbers for the rigid comp (@start)
   #self.atomsNotAssigned is the rest of the atom numbers
   self.confNums = [1] #rigid starts
   self.confAtoms = {} #maps to atom numbers
   self.confAtoms[1] = list(self.atomsAssigned)
   self.confInput = {} #maps to the input xyz lists
   self.confInput[1] = range(len(xyzData))
   confClusters = {}
   for atomNum in self.atomsNotAssigned:
     for listInputs in self.posClusterLists[atomNum]:
       tupleInputs = tuple(listInputs) #can't use lists as keys
       if tupleInputs not in confClusters.keys():
         confClusters[tupleInputs] = unionFind()
       confClusters[tupleInputs].find(atomNum) #in case of singletons
       for otherNum, bondType in atomBonds[atomNum]:
         if listInputs in self.posClusterLists[otherNum]:
           confClusters[tupleInputs].union(atomNum, otherNum)
   for tupleInputs, clusters in confClusters.iteritems():
     for atomLists in clusters.toLists(): 
       #make a conf for each
       thisConf = self.confNums[-1] + 1
       self.confAtoms[thisConf] = atomLists
       self.confInput[thisConf] = tupleInputs
       self.confNums.append(thisConf)
Example #4
0
 def _countPositionsFewPoints(self, xyzData, tolerance):
   '''for a list of list of xyz data, count the number of positions each 
   atom takes based on the tolerance and the distance. tolerance is compared
   to the euclidean difference squared to determine if a position is equal.
   actually uses a clustering algorithm and uses a unionfind data structure.'''
   self.posCount = []
   self.posClusters = [] #just save all the data since we made it
   self.posClusterLists = [] #just save all the data since we made it
   tolerance2 = tolerance ** 2. #square the tolerance since it is compared
   for oneSet in xrange(len(xyzData[0])): #goes from 0 to atom count
     clusters = unionFind()
     xyzList = []
     for oneIndex in xrange(len(xyzData)): #0 to number of positions (mol2#s)
       clusters.find(oneIndex) #initiate each position
       xyzList.append(xyzData[oneIndex][oneSet])
     for oneIndex in xrange(len(xyzData)): #0 to positions
       oneXyz = xyzList[oneIndex]
       for twoIndex in xrange(oneIndex+1, len(xyzData)): #oneIndex to positions
         if geometry_basic.distL2Squared3(oneXyz, xyzList[twoIndex]) \
                                                                  < tolerance2:
           clusters.union(oneIndex, twoIndex)
     tempLists = clusters.toLists()
     self.posCount.append(len(tempLists))
     self.posClusters.append(clusters)
     self.posClusterLists.append(tempLists)
Example #5
0
 def _countPositions(self, xyzData, tolerance, verbose=False):
   '''for a list of list of xyz data, count the number of positions each 
   atom takes based on the tolerance and the distance. tolerance is compared
   to the euclidean difference squared to determine if a position is equal.
   actually uses a clustering algorithm and uses a unionfind data structure.'''
   self.posCount = []
   self.posClusters = [] #just save all the data since we made it
   self.posClusterLists = [] #just save all the data since we made it
   tolerance2 = tolerance ** 2. #square the tolerance since it is compared
   for oneSet in xrange(len(xyzData[0])): #goes from 0 to atom count
     #if verbose:
     #  print oneSet, " atom positions being calculated"
     clusters = unionFind()
     xyzList = []
     for oneIndex in xrange(len(xyzData)): #0 to number of positions (mol2#s)
       clusters.find(oneIndex) #initiate each position
       xyzList.append(xyzData[oneIndex][oneSet])
     bucket = buckets.Bucket3d(xyzList, tolerance) #constructor to make fast
     bucket.getWithinCluster(clusters)
     #for pointA, pointB in bucket.getWithin(clusters):
     #  clusters.union(pointA, pointB)
     tempLists = clusters.toLists()
     self.posCount.append(len(tempLists))
     self.posClusters.append(clusters)
     self.posClusterLists.append(tempLists)
Example #6
0
def findBiggestDisjointSets(pointList, triList, pointNeighborList):
  '''slightly improved code-- well 15 seconds faster on small stuff'''
  pointSetUF = unionfind2.unionFind()
  for nhbrPointsList in pointNeighborList:
    #first check to see if point is already in a list
    startPt = nhbrPointsList[0]
    for otherPt in nhbrPointsList[2:]:
      pointSetUF.union(startPt, otherPt)
  pointSets = pointSetUF.toLists()
  #remove points + tris not in the biggest disjoint set (cavities)
  largest, size = 0, 0
  for index in xrange(len(pointSets)):
    if len(pointSets[index]) > size:
      largest, size = index, len(pointSets[index])
  allowedPoints = pointSets[largest]
  #figured it out, make sets
  allPoints, cavPoints = set(), set()
  for point in pointList:
    if int(point[0]) in allowedPoints:
      allPoints.update([int(point[0])])
    else:
      cavPoints.update([int(point[0])])
  allTris, cavTris = set(), set()
  for tri in triList:
    if int(tri[1]) in allPoints:  # any triangle point is okay
      allTris.update([int(tri[0])])
    else:
      cavTris.update([int(tri[0])])
  #print len(allPoints), len(pointList)
  #print len(allTris), len(triList)
  return allPoints, allTris, cavPoints, cavTris
Example #7
0
 def getRMSDclusters(self, rmsdCutoff=None, numClusters=1):
   '''uses the rmsdlist to make clusters of conformations based on rmsd.
   goes until either the rmsdCutoff is reached or numClusters is reached.
   using the numClusters will make this run very slowly.
   uses single linkage to make a new cluster.'''
   self.getRMSDtable() #make the table, or ensure it is made
   #self.rmsdList is a tuple of (rmsd, conf, conf)
   clusters = unionfind2.unionFind()
   for xyzCount in xrange(len(self.atomXyz)):
     clusters.find(xyzCount) # initialize all these to singleton clusters
   if rmsdCutoff is None:
     rmsdCutoff = self.rmsdList[-1][0] + 1.0 #make it never happen
   for rmsdTuple in self.rmsdList:
     if rmsdTuple[0] > rmsdCutoff:
       break #quit combining things!
     clusters.union(rmsdTuple[1], rmsdTuple[2])
   return clusters.toLists()
Example #8
0
 def clusterAtoms(self, distanceCutoff=2.0):
   '''breaks into distinct unions of atoms based on distance cutoff'''
   ligandClusters = unionfind2.unionFind()
   cutoffSquared = distanceCutoff ** 2. #faster comparisons
   for index,coord in enumerate(self.coords):
     for index2,coord2 in enumerate(self.coords):
       if index2 > index: #only do comparisons once each
         distBetweenSquared = geometry.distL2Squared(coord, coord2)
         if distBetweenSquared <= cutoffSquared:
           ligandClusters.union(index, index2)
   clusteredLists = ligandClusters.toLists()
   newPdbs = [] #list of pdbData objects to return
   for oneCluster in clusteredLists:
     newPdb = self.copy()
     markedForRemoval = []
     for index in xrange(len(self.coords)):
       if index not in oneCluster:
         markedForRemoval.append(index)
     for index in markedForRemoval:
       newPdb.removeLine(newPdb.atomToRaw[index])
     newPdbs.append(newPdb)
   return newPdbs
Example #9
0
def findBiggestDisjointSetsBreakCavities(pointList, triList, pointNeighborList):
  '''breaks out each cavity separately. doesn't return tris, just points'''
  pointSetUF = unionfind2.unionFind()
  for nhbrPointsList in pointNeighborList:
    #first check to see if point is already in a list
    startPt = nhbrPointsList[0]
    for otherPt in nhbrPointsList[2:]:
      pointSetUF.union(startPt, otherPt)
  pointSets = pointSetUF.toLists()
  #remove points + tris not in the biggest disjoint set (cavities)
  largest, size = 0, 0
  for index in xrange(len(pointSets)):
    if len(pointSets[index]) > size:
      largest, size = index, len(pointSets[index])
  allowedPoints = pointSets[largest]
  #figured it out, make sets
  allPoints, cavPoints = set(allowedPoints), set()
  pointSets.remove(allowedPoints)
  for cavPtSet in pointSets:
    cavPoints.update(cavPtSet)
  #print len(allPoints), len(pointList)
  return allPoints, cavPoints, pointSets
Example #10
0
 def _findRigidComponent(self, atomBonds):
   '''uses bond and position count information to find largest set of atoms
   that don't move. this is the rigid component. set into self.rigidComponent
   also find the complement of atomnums and the rigid component and set into
   self.atomsNotAssigned for use later'''
   clusters = unionFind()
   for atomNum in xrange(len(self.posCount)):
     if 1 == self.posCount[atomNum]:
       for otherNum, bondType in atomBonds[atomNum]:
         if 1 == self.posCount[otherNum]:
           clusters.union(atomNum, otherNum)
   maxSize = 0
   maxCluster = None
   clusterLists = clusters.toLists()
   for clusterList in clusterLists:
     if len(clusterList) > maxSize:
       maxSize = len(clusterList)
       maxCluster = clusterList
   self.rigidComponent = maxCluster
   self.atomsAssigned = set(self.rigidComponent)
   self.atomsNotAssigned = set()
   for atomNum in xrange(len(self.posCount)):
     if atomNum not in self.rigidComponent:
       self.atomsNotAssigned.add(atomNum)
Example #11
0
 def addSearchConnections(
     self, totalThreshold, remove=False, mst=False,
     maxConnCount=100000000000, lineMst=False,
     startNode=None, endNode=None, clusterOutput=False):
   '''adds the connections to self.connections if they meet the requirements'''
   tempConns = self.matchList[:]  # copy and destroy possibly
   self.connections = []
   if clusterOutput:
     clusters = unionfind2.unionFind()
     overlapFunction = self.tmDataList[0].compareResidueIdentityMultipleNodes
     overlapCache = {}
     treeCountCache = {}
   for tmData in self.tmDataList:
     self.tmToSubgraph[tmData].resetKeepers()
   if mst:  # init this data structure
     mstUF = unionfind2.unionFind()
   if lineMst:  # init this data structure
     mstUF = unionfind2.unionFind()
     connsLimit2 = {}
     if startNode is not None and endNode is not None:  # limit endpoints
       connsLimit2[startNode] = [endNode]   # for lineMstEnds given hints
   tempConns.sort(key=operator.itemgetter(4))  # best first
   for aMatch in tempConns:
     tmData, tmData2, node1, node2, totalScore = aMatch  # unpack
     mstOkay = (not mst) and (not lineMst)  # iff both false, everything is ok
     if mst:  # do checks for mst
       if mstUF.different(node1, node2):  # calls find on node1+2 to init them
         mstOkay = True
         mstUF.union(node1, node2)
     elif lineMst:  # if okay to mst might check for linemst
       if mstUF.different(node1, node2):  # calls find on node1+2 to init them
         if (node1 not in connsLimit2 or len(connsLimit2[node1]) == 1) and \
             (node2 not in connsLimit2 or len(connsLimit2[node2]) == 1):
           #only now we know it is completely okay
           if node1 not in connsLimit2:
             connsLimit2[node1] = []
           connsLimit2[node1].append(node2)
           if node2 not in connsLimit2:
             connsLimit2[node2] = []
           connsLimit2[node2].append(node1)
           mstOkay = True
           mstUF.union(node1, node2)
     if mstOkay:  # means either everything is fine or not mst
       if totalScore < totalThreshold and len(self.connections) < maxConnCount:
         subGraph1 = self.tmToSubgraph[tmData]
         subGraph2 = self.tmToSubgraph[tmData2]
         newConn = [
             subGraph1.nodeToName[node1],
             subGraph2.nodeToName[node2], totalScore, node1, node2]
         subGraph1.keepers[newConn[0]] = True
         subGraph2.keepers[newConn[1]] = True
         self.connections.append(newConn)
         if clusterOutput:
           clusters.union(node1, node2)  # really is that simple
           clustLists = clusters.toLists()
           for aCluster in clustLists:
             aCluster.sort()
             tupleCluster = tuple(aCluster)
             if tupleCluster not in overlapCache:
               aOverlap = overlapFunction(aCluster)
               overlapCache[tupleCluster] = aOverlap
             else:
               aOverlap = overlapCache[tupleCluster]
             if tupleCluster not in treeCountCache:
               treeSet = set()
               for node in aCluster:
                 treeSet.add(node.tree)
               treeCount = len(treeSet)
               treeCountCache[tupleCluster] = treeCount
             else:
               treeCount = treeCountCache[tupleCluster]
             if aOverlap >= 0.0 or len(clustLists) < 5:
               print len(self.connections), len(clustLists),
               print "len:", len(aCluster), "over:", aOverlap,
               print "count:", treeCount,
               print outputDrawStr(aCluster[0].tree.inputFileName, aCluster[0])
   if remove:  # sometimes may not want to do this here
     for subGraph in self.tmToSubgraph.values():
       subGraph.removeNonKeepers()