def analyzePdbB(filenameList=False):
  residues = {}  # a dict of dicts where the sub-dicts are keyed on atom name
  eachRes = {}  # dict keyed on RESNAME-RESNUM
  if filenameList:
    for filename in filenameList:
      pdbD = pdb.pdbData(filename)
      for index, resName in enumerate(pdbD.resNames):
        if pdbD.radii[index] > 0.:
          if resName not in residues:
            residues[resName] = {}  # init sub-dict
          if string.strip(pdbD.atoms[index]) not in residues[resName]:
            residues[resName][string.strip(pdbD.atoms[index])] = []
          residues[resName][string.strip(pdbD.atoms[index])].append(
              pdbD.factors[index][1])
          resNum = pdbD.resNums[index]
          longName = str(resName) + str(resNum)
          if longName not in eachRes:
            eachRes[longName] = {}  # init sub-dict
          if string.strip(pdbD.atoms[index]) not in eachRes[longName]:
            eachRes[longName][string.strip(pdbD.atoms[index])] = []
          eachRes[longName][string.strip(pdbD.atoms[index])].append(
              pdbD.factors[index][1])
    #residues now contains all the b-factor (travelin) data
  makeResidueReport(residues)
  makeResidueReport(
      eachRes, outputFilename="individual.res.bfactor", runGraphs=False)
  makeAtomReport(residues)
  makeAtomReport(
      eachRes, outputFilename="individual.atom.bfactor", runGraphs=False)
  makeHistogramReport(residues)
def getJustMembranePdb(inputFileName, outputFileName):
  pdbBarriers = pdb.pdbData(inputFileName)
  #get the barriers read in and defined
  barrierAtomList = [[], []]
  for index, resName in enumerate(pdbBarriers.resNames):
    if resName == "DUM":
      if pdbBarriers.atoms[index][0] == "O":
        barrierAtomList[0].append(pdbBarriers.coords[index])
      elif pdbBarriers.atoms[index][0] == "N":
        barrierAtomList[1].append(pdbBarriers.coords[index])
  barrierZ = [barrierAtomList[0][0][2], barrierAtomList[1][0][2]]
  barrierZ.sort()
  barrierSep = geometry.distL2(barrierAtomList[0][0], barrierAtomList[1][0])
  zCoord = barrierZ[1]
  goodResChain = []
  for index, thisResNum in enumerate(pdbBarriers.resNums):
    chain = pdbBarriers.chains[index]
    resChain = str(thisResNum) + str(chain)
    if resChain not in goodResChain:
      #otherwise don't need to check, already in
      zTest = pdbBarriers.coords[index][2]
      if abs(zTest) <= zCoord:
        goodResChain.append(resChain)
  newPdb = pdbBarriers.getListResiduesChains(goodResChain)
  newPdb.write(outputFileName)
def repairPointPdbRecord(tstD=None, tstFileName=False):
  '''checks and repairs pointpdbrecord if it has no data in it'''
  same = True
  if tstD is None:  # hasn't been read in already
    tstD = tstdata.tstData(
        tstFileName, necessaryKeys=tstdata.tstData.necessaryKeysForPocket)
  lastPdbNum = tstD.dict['POINT_PDB_RECORD'][0][1]-1
  for pointPdb in tstD.dict['POINT_PDB_RECORD']:
    pdbNum = pointPdb[1]-1
    if pdbNum != lastPdbNum:
      same = False
  if same:  # needs repaired, otherwise this is over
    pdbD = pdb.pdbData()
    for line in tstD.dict['PDB_RECORD']:
      pdbD.processLine(line)
    ptCoords = {}
    for pointXyz in tstD.dict['POINT_XYZ']:
      ptCoords[pointXyz[0]] = tuple(pointXyz[1:])
    coordToNearbyAtoms = pdbD.getNearbyAtoms(ptCoords.values())
    newPointPdbRec = []
    for pointPdb in tstD.dict['POINT_PDB_RECORD']:
      atomFound = coordToNearbyAtoms[ptCoords[pointPdb[0]]][0]
      newPointPdbRec.append([pointPdb[0], atomFound])
    #replace old record with new record
    tstD.dict['POINT_PDB_RECORD'] = newPointPdbRec
    if tstFileName:
      tstFile = open(tstFileName, 'a')  # append into file
      tstdata.writeEntryIntegers(
          tstD.dict['POINT_PDB_RECORD'], "POINT_PDB_RECORD",
          "END POINT_PDB_RECORD", tstFile)
      tstFile.close()
Example #4
0
def calcRMSDprintTable(pdbFileNames):
  pdbDatas = []
  for pdbFileName in pdbFileNames:
    pdbDatas.append(pdb.pdbData(pdbFileName))
  matrix = []
  for index1 in xrange(len(pdbDatas)):
    tempRow = []
    for index2 in xrange(len(pdbDatas)):
      if index1 > index2:
        rmsd = pdbDatas[index1].calcRMSD(pdbDatas[index2], alphas=True)
        tempRow.append(rmsd)
    matrix.append(tempRow)
  print "matrix",
  for index1 in xrange(len(pdbDatas)):
    print pdbFileNames[index1],
  print " "  # title row done
  for index1 in xrange(len(pdbDatas)):
    print pdbFileNames[index1],
    for index2 in xrange(len(pdbDatas)):
      if index1 == index2:  # always 0
        print 0.,
      elif index1 > index2:
        print matrix[index1][index2],
      else:
        print matrix[index2][index1],
    print " "  # this row done
Example #5
0
def readTri(triName):
  '''reads a tri file, returns the data inside'''
  pdbD = pdb.pdbData()
  triLineToNum = {}
  triPoints, oneTri = [], []
  triFile = open(triName, 'r')
  try:
    inPdb, inTri = False, False
    for line in triFile:
      if line.startswith('PDB_RECORD'):
        inPdb = True
      elif line.startswith('END PDB_RECORD'):
        inPdb = False
      elif line.startswith('TRIANGLE_XYZ'):
        inTri = True
      elif line.startswith('END TRIANGLE_XYZ'):
        inTri = False
      else:  # actually do some processing of lines
        if inPdb:
          pdbD.processLine(line)
        elif inTri:
          strippedLine = string.strip(line)
          if strippedLine not in triLineToNum:
            triLineToNum[strippedLine] = len(triLineToNum) + 1
          triNum = triLineToNum[strippedLine]
          oneTri.append(triNum)
          if len(oneTri) == 3:
            triPoints.append(oneTri)
            oneTri = []
  except StopIteration:  # EOF
    pass
  return pdbD, triLineToNum, triPoints
def pdbMostOccupied(pdbFileName, outName=None, exceptions=[]):
  pdbEntry = pdb.pdbData(pdbFileName)
  newPdb = pdbEntry.copy()
  #print newPdb.altChars.count('A'), newPdb.altChars.count('B')
  newPdb.selectMostOccupied(exceptions=exceptions)
  #print newPdb.altChars.count('A'), newPdb.altChars.count('B')
  newPdb.write(outName)
def tstTravelDepthMesh(
    tstFileName, phiFileName, ligandFileName=None, cavities=False,
    threshold="auto"):
  '''sets up a normal travel depth mesh run, calls tstTravelDepthMeshRun.
  if cavities is set, it means we want the (new) travel depth of those too'''
  distanceName = 'traveldepth'
  tstD = tstdata.tstData(tstFileName)  # read the file into the data structure
  phiData = phi(phiFileName)  # read in the phimap if possible
  phiGridSpacing = 1. / phiData.scale
  phiTravelDepthGrid, phiTravelDepth, meshData = tstTravelDepthMeshRun(
      tstD, phiData, tstFileName, cavities=cavities,
      threshold=threshold)  # modifies tstD in place
  if ligandFileName is not None:
    ligand = pdb.pdbData(ligandFileName)
    ligandXYZR = ligand.getHeavyAtomXYZRadius()
    betweenList = meshData.getBetweenNodes()
    surfaceList = meshData.getSurfaceNodes()
    nodeWithinSet = meshData.getWithinNodesNoInside(ligandXYZR)
    #print len(nodeWithinSet), len(ligandXYZR)
    tracebackSet = meshData.getTracebackSet(nodeWithinSet, distanceName)
    #print len(tracebackSet)
    minW, maxW, meanW = meshData.getMinMaxMeanNodes(nodeWithinSet, distanceName)
    minT, maxT, meanT = meshData.getMinMaxMeanNodes(tracebackSet, distanceName)
    minB, maxB, meanB = meshData.getMinMaxMeanNodes(betweenList, distanceName)
    minS, maxS, meanS = meshData.getMinMaxMeanNodes(surfaceList, distanceName)
    #print all on one line, header printed earlier
    print minW, maxW, meanW,
    print minT, maxT, meanT,
    print minB, maxB, meanB,
    print minS, maxS, meanS,
    volumeWithin = len(nodeWithinSet) * phiGridSpacing**3.
    volumeTrace = len(tracebackSet) * phiGridSpacing**3.
    print volumeWithin, volumeTrace  # newline wanted here so no comma
    #print phiGridSpacing
    listWithin, listTrace = [], []
    for node in nodeWithinSet:
      listWithin.append(node.getXYZ())
    for node in tracebackSet:
      listTrace.append(node.getXYZ())
    tstdebug.pointDebug(listWithin, filename=ligandFileName+".within.py")
    tstdebug.pointDebug(listTrace, filename=ligandFileName+".trace.py")
  #tstdebug.debugGridCountVals(phiTravelDepthGrid)
  #transform grid to actual travel distance
  phiTravelDepth.write(tstFileName+".travel.phi")
  #write data to file
  tstFile = open(tstFileName, 'a')
  tstFile.write("DEPTH_TRAVEL_DIST\n")
  for line in tstD.dict['DEPTH_TRAVEL_DIST']:
    lineOut = "%8d" % line[0]
    for count in xrange(1, len(line)):
      lineOut += "%+9.4f " % line[count]
    noPlusLine = string.replace(lineOut, "+", " ")
    tstFile.write(noPlusLine)
    tstFile.write("\n")
  tstFile.write("END DEPTH_TRAVEL_DIST\n")
  tstFile.close()
def getClosestToAverage(fileName, skipAlign=True):
  '''takes a pdb file of an nmr structure, superposes all structures, finds
  euclidean average, then finds closest original model to the average, then
  picks it out as the one to use'''
  if not skipAlign:
    import superimpose  # superposition function, calls fortran code
  pdbD = pdb.pdbData(fileName)
  modelNums = pdbD.getModelNumbers()
  firstModel = False
  alignedModels = []
  for modelNum in modelNums:
    newPdb = pdbD.getOneModel(modelNum)
    outputFileName = fileName[:-4] + ".model." + str(modelNum) + ".pdb"
    newPdb.write(outputFileName)
    if not firstModel:
      firstModel = outputFileName  # don't align first
    else:
      if not skipAlign:
        alignFileName = "align." + outputFileName
        superimpose.superposition(firstModel, outputFileName, alignFileName)
        alignedModels.append(alignFileName)
      else:
        alignedModels.append(outputFileName)
  firstModelPdbD = pdb.pdbData(firstModel)
  averageFileName = fileName[:-4] + ".average.pdb"
  averagePdbD = firstModelPdbD.getAverageCoords(alignedModels)
  averagePdbD.write(averageFileName)
  bestModelName = firstModel
  bestRMSD = averagePdbD.calcRMSDfile(firstModel)
  for alignModel in alignedModels:
    otherRMSD = averagePdbD.calcRMSDfile(alignModel)
    if otherRMSD < bestRMSD:
      bestRMSD = otherRMSD
      bestModelName = alignModel
  #print bestRMSD, alignModel
  bestFileName = fileName[:-4] + ".best.pdb"
  modelNumber = bestModelName.replace(".pdb", "").replace(
      "align."+fileName[:-4]+".model.", "").replace(fileName[:-4]+".model.", "")
  pdbD = pdb.pdbData(fileName)
  newPdb = pdbD.getOneModel(int(modelNumber))
  newPdb.write(bestFileName)  # writes all other file info not just coords
def analyzePdbBCompareFiles(firstFilename, compareFilename):
  residues = ({}, {})  # a dict of dicts where the sub-dicts keys are atom names
  pdbs = ([], [])  # each list is a pdb, which has a dictionary of residues
  for lindex, listFilename in enumerate((firstFilename, compareFilename)):
    filenameList = []
    if listFilename:
      listFile = open(listFilename, 'r')
      try:
        for line in listFile:
          if len(line) == 6:
            filenameList.append(
                line[0:4] + "." + line[4] + ".nocav.tst.mesh.travelin.pdb")
          if len(line) == 5:  # analyze non-domains
            filenameList.append(line[0:4] + ".nocav.tst.mesh.travelin.pdb")
      except StopIteration:
        pass
      listFile.close()
    if filenameList:
      for filename in filenameList:
        pdbD = pdb.pdbData(filename)
        tempRes = {}
        for index, resName in enumerate(pdbD.resNames):
          if pdbD.radii[index] > 0.:
            if resName not in tempRes:
              tempRes[resName] = {}
            if resName not in residues[lindex]:
              residues[lindex][resName] = {}  # init sub-dict
            if string.strip(pdbD.atoms[index]) not in residues[lindex][resName]:
              residues[lindex][resName][string.strip(pdbD.atoms[index])] = []
            if string.strip(pdbD.atoms[index]) not in tempRes[resName]:
              tempRes[resName][string.strip(pdbD.atoms[index])] = []
            #if pdbD.factors[index][1] > 30:
            #  #print pdbD.factors[index][1], filename #debugging
            #print pdbD.atoms[index],
            residues[lindex][resName][string.strip(pdbD.atoms[index])].append(
                pdbD.factors[index][1])
            tempRes[resName][string.strip(pdbD.atoms[index])].append(
                pdbD.factors[index][1])
        pdbs[lindex].append(tempRes)
      #residues now contains all the b-factor (travelin) data
  #need to change after this line to call new methods
  listFilename = firstFilename + "." + compareFilename
  analyzePdbB.makeCompareResidueReport(
      residues, listFilename + ".residue.bfactor")
  #do alternate pvalue tests and by file comparisons with pdbs lists
  corrAll, corrBeta = analyzePdbB.makeCompareResidueReportAlternate(
      pdbs, listFilename + ".residue.alt.bfactor")
  #now do a p-value comparison that is corrected for the overall depth diffs
  analyzePdbB.makeCompareResidueReportAlternate(
      pdbs, listFilename + ".residue.alt.corr.bfactor",
      correctionAll=corrAll, correctionBeta=corrBeta)
  analyzePdbB.makeMeanPerProteinReport(
      pdbs, outName=listFilename + ".meansbypdb.txt")
def analyzePdbBfromFile(listFilename, buriedThresh=2.):
  residues = {}  # a dict of dicts where the sub-dicts are keyed on atom name
  buriedResidues, surfaceResidues = {}, {}
  filenameList = []
  if listFilename:
    listFile = open(listFilename, 'r')
    try:
      for line in listFile:
        if len(line) == 6:
          filenameList.append(
              line[0:4] + "." + line[4] + ".nocav.tst.mesh.travelin.pdb")
    except StopIteration:
      pass
    listFile.close()
  if filenameList:
    for filename in filenameList:
      pdbD = pdb.pdbData(filename)
      for index, resName in enumerate(pdbD.resNames):
        if pdbD.radii[index] > 0.:
          atomName = string.strip(pdbD.atoms[index])
          bfactors = pdbD.getFactorsByResidueChain(
              pdbD.resNums[index], pdbD.chains[index])
          if buriedThresh > min(bfactors):
            if resName not in surfaceResidues:
              surfaceResidues[resName] = {}  # init sub-dict
            if atomName not in surfaceResidues[resName]:
              surfaceResidues[resName][atomName] = []
            surfaceResidues[resName][atomName].append(pdbD.factors[index][1])
          else:
            if resName not in buriedResidues:
              buriedResidues[resName] = {}  # init sub-dict
            if atomName not in buriedResidues[resName]:
              buriedResidues[resName][atomName] = []
            buriedResidues[resName][atomName].append(pdbD.factors[index][1])
          #add it to this either way
          if resName not in residues:
            residues[resName] = {}  # init sub-dict
          if atomName not in residues[resName]:
            residues[resName][atomName] = []
          residues[resName][atomName].append(pdbD.factors[index][1])
  #residues now contains all the b-factor (travelin) data
  analyzePdbB.makeResidueReport(
      residues, listFilename + ".residue.bfactor",
      maxY=6, maxYBeta=6)  # hardcoded for now...
  analyzePdbB.makeAtomReport(residues, listFilename + ".atom.bfactor")
  analyzePdbB.makeAtomReport(
      surfaceResidues, listFilename + ".atom.bfactor.surface")
  analyzePdbB.makeAtomReport(
      buriedResidues, listFilename + ".atom.bfactor.buried")
  analyzePdbB.makeHistogramReport(residues, listFilename + ".histogram.bfactor")
def readSphPathRadius(sphFileName):
  result, radii = [], []
  sphData = pdb.pdbData(sphFileName)     # sph is basically PDB format
  seenZeroYet = False  # stupid hack for sph file
  for index, factors in enumerate(sphData.factors):  # if second factor
    resNum = sphData.resNums[index]
    if factors[1] > 0.0:                  # is non-zero, then part of path
      if resNum > 0:
        result.append(sphData.coords[index])
        radii.append(sphData.factors[index][0])
      elif resNum == 0 and not seenZeroYet:
        result.append(sphData.coords[index])
        radii.append(sphData.factors[index][0])
        seenZeroYet = True
  return result, radii
Example #12
0
def outputNearbyResidues(pointPath, outName, pdbRawData, nearbyDistance):
  '''outputs 2 different files. 1. residues near the minimum radius
  2. residues near the entire path.
  near is defined as radius + nearbyDistance'''
  pdbD = pdb.pdbData()
  pdbD.processLines(pdbRawData)
  minRadiusPoint = pointPath[0]
  for pathPt in pointPath:
    if pathPt[0] < minRadiusPoint[0]:
      minRadiusPoint = pathPt
  residuesNearMin = getNearbyResidues([minRadiusPoint], pdbD, nearbyDistance)
  pdbNearMin = pdbD.getListResiduesChains(residuesNearMin)
  pdbNearMin.write(outName + ".residues.pathmin.pdb")
  residuesNearPath = getNearbyResidues(pointPath, pdbD, nearbyDistance)
  pdbNearPath = pdbD.getListResiduesChains(residuesNearPath)
  pdbNearPath.write(outName + ".residues.path.pdb")
  #that's it... files written, return residue lists in case
  return residuesNearMin, residuesNearPath
def tstTravelSurfInsideOld(tstFileName, phiFileName=False):
  '''does the old algorithm of just computing the shortest distance to any
  surface point from any atom by going through both lists the hard way'''
  tstD = tstdata.tstData(tstFileName)  # read the file into the data structure
  #do the biggest disjoint set of tris/points stuff
  allPoints, allTris, cavPoints, cavTris = cavity.assumeNoCavities(
      tstD.dict['POINT_XYZ'], tstD.dict['TRIANGLE_POINT'],
      tstD.dict['POINT_NEIGHBOR'])
  pointXyz = tstD.dict['POINT_XYZ']
  pdbD = pdb.pdbData()
  for line in tstD.dict['PDB_RECORD']:
    pdbD.processLine(line)
  atomTravelInDepths = []
  for coord in pdbD.coords:
    minDist = geometry.distL2(coord, pointXyz[allPoints[0]-1][1:])
    #set to first
    for point in allPoints[1:]:
      thisDist = geometry.distL2(coord, pointXyz[point-1][1:])
      minDist = min(minDist, thisDist)
    atomTravelInDepths.append(minDist)
  #make a pdb file with the bfactor replaced
  for index, atomTID in enumerate(atomTravelInDepths):
    pdbD.updateFactors(index, (pdbD.factors[index][0], atomTID))
  pdbD.write(tstFileName+".old.atomdepth.pdb")
  #also add record to tstdata
  atomTIDRecord = []
  for index, atomTID in enumerate(atomTravelInDepths):
    atomTIDRecord.append([index+1, atomTID])
  tstD.dict['ATOM_DEPTH_OLD'] = atomTIDRecord
  #write data into tst file
  tstFile = open(tstFileName, 'a')
  tstFile.write("ATOM_TRAVEL_IN\n")
  for line in tstD.dict['ATOM_DEPTH_OLD']:
    lineOut = "%8d" % line[0]
    for count in xrange(1, len(line)):
      lineOut += "%+9.4f " % line[count]
    noPlusLine = string.replace(lineOut, "+", " ")
    tstFile.write(noPlusLine)
    tstFile.write("\n")
  tstFile.write("END ATOM_DEPTH_OLD\n")
  tstFile.close()
def calculateCharges(tstD, chargeD):
  '''actually does charge assignment, returns 2 lists'''
  pdbD = pdb.pdbData()
  for line in tstD.dict['PDB_RECORD']:
    pdbD.processLine(line)
  pdbD.assignCharges(chargeD)
  #for index in xrange(len(pdbD.atoms)): #used to debug assignments
  #  print pdbD.atoms[index], pdbD.resNames[index], pdbD.charges[index]
  chargeXyz, hydroXyz = [], []  # new tst record list of [number, charge]
  for pointPdb in tstD.dict['POINT_PDB_RECORD']:
    pdbNum = pointPdb[1] - 1
    tempCharge = pdbD.charges[pdbNum]
    tempHydroCharge = pdbD.hydroCharges[pdbNum]
    if tempCharge is None:  # warn user that charges didn't get assigned
      print "warning: charge is not assigned for " + pdbD.atoms[pdbNum] + \
          " " + pdbD.resNames[pdbNum]
      print "charge is set to ZERO for now"
      tempCharge = 0.
      tempHydroCharge = 0.
    chargeXyz.append([pointPdb[0], tempCharge])
    hydroXyz.append([pointPdb[0], tempHydroCharge])
  return chargeXyz, hydroXyz
def analyzePdbBCompareFiles(firstFilename):
  pdbs = {}  # each list is a pdb, which has a dictionary of residues
  filenameList = []
  if firstFilename:
    listFile = open(firstFilename, 'r')
    try:
      for line in listFile:
        if len(line) == 6:
          filenameList.append(
              line[0:4] + "." + line[4] + ".nocav.tst.mesh.travelin.pdb")
        elif len(line) == 5:  # analyze non-domains
          filenameList.append(line[0:4] + ".nocav.tst.mesh.travelin.pdb")
        elif len(line) > 8:
          filenameList.append(line[:-1])
        else:  # yeah just allow anything
          filenameList.append(line[:-1])
    except StopIteration:
      pass
    listFile.close()
  if filenameList:
    for filename in filenameList:
      pdbs[filename] = []
      pdbD = pdb.pdbData(filename)
      tempRes = {}
      for index, resName in enumerate(pdbD.resNames):
        if pdbD.radii[index] > 0.:
          if resName not in tempRes:
            tempRes[resName] = {}
          if string.strip(pdbD.atoms[index]) not in tempRes[resName]:
            tempRes[resName][string.strip(pdbD.atoms[index])] = []
          tempRes[resName][string.strip(pdbD.atoms[index])].append(
              pdbD.factors[index][1])
      pdbs[filename] = tempRes
      #residues now contains all the b-factor (travelin) data
  #need to change after this line to call new methods
  listFilename = firstFilename
  analyzePdbB.makeMeanPerProteinReport(
      pdbs, outName=listFilename + ".meansbypdb.txt")
def checkPathBarriers(prefix):
  tstName = prefix + ".nocav.tst"
  findHolesName = tstName + ".findholes.log"
  findHolesFile = open(findHolesName, 'r')
  findHolesLines = findHolesFile.readlines()
  findHolesFile.close()
  HolesName = tstName + ".membranehole.log"  # holds all the output
  goodHolesName = tstName + ".good.membranehole.log"  # just the 1 1 0 1 1
  sideHolesName = tstName + ".side.membranehole.log"  # just the * * 1 * *
  badHolesName = tstName + ".bad.membranehole.log"  # all others
  pdbWithBarriersFileName = "planes_" + prefix + ".pdb"
  pdbBarriers = pdb.pdbData(pdbWithBarriersFileName)
  #get the barriers read in and defined
  barrierAtomList = [[], []]
  for index, resName in enumerate(pdbBarriers.resNames):
    if resName == "DUM":
      if pdbBarriers.atoms[index][0] == "O":
        barrierAtomList[0].append(pdbBarriers.coords[index])
      elif pdbBarriers.atoms[index][0] == "N":
        barrierAtomList[1].append(pdbBarriers.coords[index])
  barrierZ = [barrierAtomList[0][0][2], barrierAtomList[1][0][2]]
  barrierZ.sort()
  barrierSep = geometry.distL2(barrierAtomList[0][0], barrierAtomList[1][0])
  #barrier is just Z coordinate
  #setup for main loop over paths
  poreSuffix = ".pore.py"
  logFile = open(HolesName, 'w')
  goodLogFile = open(goodHolesName, 'w')
  sideLogFile = open(sideHolesName, 'w')
  badLogFile = open(badHolesName, 'w')
  #the following 5 things are calculated and written for each path, headers
  #the 6th, barrier separation, is really the same for each structure
  logFile.write("endsBeyond1count barrier1count endsBetweenCount ")
  logFile.write("barrier2count endsBeyond2count barrierSeparation\n")
  goodLogFile.write("prefix ")
  goodLogFile.write(string.strip(findHolesLines[0]) + " ")
  goodLogFile.write("endsBeyond1count barrier1count endsBetweenCount ")
  goodLogFile.write("barrier2count endsBeyond2count barrierSeparation\n")
  sideLogFile.write("prefix ")
  sideLogFile.write(string.strip(findHolesLines[0]) + " ")
  sideLogFile.write("endsBeyond1count barrier1count endsBetweenCount ")
  sideLogFile.write("barrier2count endsBeyond2count barrierSeparation\n")
  badLogFile.write("prefix ")
  badLogFile.write(string.strip(findHolesLines[0]) + " ")
  badLogFile.write("endsBeyond1count barrier1count endsBetweenCount ")
  badLogFile.write("barrier2count endsBeyond2count barrierSeparation\n")
  holeNumber = 1
  poreFile = tstName + "." + str(holeNumber) + poreSuffix
  print poreFile
  paths = []
  sides = []
  while os.path.exists(poreFile):
    path = comparePaths.readCGOPath(poreFile)
    paths.append(path)
    intersections = [0, 0]
    for index, barrier in enumerate(barrierZ):
      intersections[index] = countCrossingsZ(path, barrier)
    ends = [0, 0, 0]
    for endPoint in [path[0], path[-1]]:
      endPointZ = endPoint[2]
      if endPointZ < barrierZ[0] and endPointZ < barrierZ[1]:
        ends[0] += 1
      elif endPointZ >= barrierZ[0] and endPointZ <= barrierZ[1]:
        ends[1] += 1
      elif endPointZ > barrierZ[0] and endPointZ > barrierZ[1]:
        ends[2] += 1
    outputThisTime = str(ends[0]) + " " + str(intersections[0]) + " " + \
        str(ends[1]) + " " + str(intersections[1]) + " " + \
        str(ends[2]) + " " + str(barrierSep) + " "
    logFile.write(outputThisTime)
    logFile.write("\n")
    if ends[0] + ends[1] + ends[2] != 2:
      print "problems sorting out the ends"
    if ends[0] == 1 and ends[2] == 1 and intersections == [1, 1]:
      # it is 'good'
      goodLogFile.write(prefix + " ")
      goodLogFile.write(string.strip(findHolesLines[holeNumber]) + " ")
      goodLogFile.write(outputThisTime + "\n")
    elif ends[1] == 2:
      sides.append(len(paths) - 1)
      sideLogFile.write(prefix + " ")
      sideLogFile.write(string.strip(findHolesLines[holeNumber]) + " ")
      sideLogFile.write(outputThisTime + "\n")
    else:
      badLogFile.write(prefix + " ")
      badLogFile.write(string.strip(findHolesLines[holeNumber]) + " ")
      badLogFile.write(outputThisTime + "\n")
    #and that is it for this path
    holeNumber += 1     # get set up for next pass
    poreFile = tstName + "." + str(holeNumber) + poreSuffix
  print sides
  logFile.close()
  goodLogFile.close()
  sideLogFile.close()
  badLogFile.close()
def tstPocketMap(
    tstFileName, phiFileName, tstD=None, ligandFileName=None,
    nearbyDistance=0., appendTst=True, doPCA=True):
  '''pocket mapping algorithm, finds all pockets on entire surface, puts in
  tree and graph data structure, various outputs'''
  print "read tst file"
  if tstD is None:  # hasn't been read in already
    tstD = tstdata.tstData(
        tstFileName, necessaryKeys=tstdata.tstData.necessaryKeysForPocket)
  print "repairing nearby points if necessary"
  repairPointPdbRecord(tstD, tstFileName)
  print "calculating charges"
  chargeXyz, hydroXyz = calculateCharges(tstD, charge.charge())
  print "calculating curvatures"
  edgeCurv, ptCurv, ptWeighCurv = tstCurvature.tstEdgeCurvature(
      tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_XYZ'],
      tstD.dict['POINT_TRIANGLE'], tstD.dict['POINT_NEIGHBOR'])
  tstD.dict['POINT_CURVATURE_EDGE'] = ptWeighCurv
  tstD.dict['CHARGE_XYZ'] = chargeXyz
  tstD.dict['HYDROPHOBIC_XYZ'] = hydroXyz
  print "read in phi file"
  phiData = phi(phiFileName)  # read in the phimap
  print "making mesh data structure"
  meshData, gridData = meshConstruct(
      tstD, phiData, tstFileName, threshold="auto", cavities=True)
  meshData.setPtHydro(tstD.dict['HYDROPHOBIC_XYZ'])
  meshData.setPtCurvature(tstD.dict['POINT_CURVATURE_EDGE'])
  gridSize = 1.0/phiData.scale
  tstPdbRecord = tstD.dict['PDB_RECORD']
  meshData.setSurfaceArea(tstD.dict['TRIANGLE_POINT'])
  del tstD, phiData, gridData  # not needed, reclaim memory
  pdbD = pdb.pdbData()
  pdbD.processLines(tstPdbRecord)
  pointAtomList = meshData.calculateNearbyAtoms(pdbD, nearbyDistance)
  meshData.setVolume(gridSize)
  print "calculating travel depth"
  meshData.calculateTravelDistance("traveldepth", [0], [2, 3, 5])
  pointTravelDepth = meshData.getSurfaceTravelDistance("traveldepth")
  if ligandFileName is not None:  # if there is a ligand, read it
    ligand = pdb.pdbData(ligandFileName)
    ligandXYZR = ligand.getHeavyAtomXYZRadius()
    nodeWithinSet = meshData.getWithinNodesNoInside(ligandXYZR)
    bestIU = 0.  # intersection / union, 1 is perfect
    #print nodeWithinSet, len(nodeWithinSet)
  print "pocket mapping starting"
  if ligandFileName is not None and len(nodeWithinSet) > 0:
    outFileName = ligandFileName
    #tstdebug.nodeDebug(nodeWithinSet, \
    #              filename = tstFileName+".within.ligand.py")
    localMaxima, borders, tm3tree, surfNodeToLeaf = meshData.pocketMapping(
        'traveldepth',  [2, 3, 5], pointAtomList, pdbD,
        outName=outFileName + ".", groupName='group',
        ligandNodes=nodeWithinSet, doPCA=doPCA)
  else:
    outFileName = tstFileName
    localMaxima, borders, tm3tree, surfNodeToLeaf = meshData.pocketMapping(
        'traveldepth',  [2, 3, 5], pointAtomList, pdbD,
        outName=outFileName + ".", groupName='group', doPCA=doPCA)
  #print len(localMaxima), len(borders), tm3tree, len(surfNodeToLeaf)
  #tstdebug.nodeDebug(localMaxima, \
  #              filename=tstFileName+".localmaxima.pocketmap.py")
  #tstdebug.nodeDebug(borders, \
  #              filename=tstFileName+".borders.pocketmap.py")
  #tstdebug.nodeDebug(meshData.getSurfaceNodes(), \
  #              filename=tstFileName+".groups.pocketmap.py", name='group')
  tm3tree.write(outFileName + ".tree.tm3")
  #tm3tree.writeTNV(tstFileName + ".tree.tnv")
  #doesn't seem to import into treemap correctly
  if appendTst:  # turn off sometimes since appends to tst file
    print "appending data to tst file"
    surfNodes = meshData.getSurfaceNodes()
    pointLeafList = []
    for aNode in surfNodes:
      if aNode not in surfNodeToLeaf:
        print aNode, aNode.distances
        leafNum = 0  # made up and wrong for testing
      else:
        leafNum = surfNodeToLeaf[aNode]
      pointLeafList.append([aNode, int(leafNum)])
    #print pointLeafList
    leafToGroup = tm3tree.getLeafToGroup()
    leafGroupList = []
    leafKeyMax = max(leafToGroup.keys())
    for leaf in xrange(leafKeyMax):
      tempList = [leaf + 1]
      try:
        tempList.extend(leafToGroup[leaf + 1])
      except KeyError:
        pass  # means leaf doesn't exist
      leafGroupList.append(tempList)
    #print leafGroupList
    tstFile = open(tstFileName, 'a')
    tstdata.writeEntryIntegers(
        pointLeafList, "POINT_LEAF LIST", "END POINT_LEAF", tstFile)
    tstdata.writeEntryIntegers(
        leafGroupList, "LEAF_GROUP LIST", "END LEAF_GROUP", tstFile)
    tstdata.writeEntryIntegers(
        pointAtomList, "POINT_NEARBY_ATOM LIST", "END POINT_NEARBY_ATOM",
        tstFile)
    #also write curvature and charge data here
    tstdata.writeEntrySingleFloat(
        ptWeighCurv, "POINT_CURVATURE_EDGE LIST", "END POINT_CURVATURE_EDGE",
        tstFile)
    tstdata.writeEntrySingleFloat(
        chargeXyz, "CHARGE_XYZ", "END CHARGE_XYZ", tstFile)
    tstdata.writeEntrySingleFloat(
        hydroXyz, "HYDROPHOBIC_XYZ", "END HYDROPHOBIC_XYZ", tstFile)
    #write data to file
    tstFile.write("DEPTH_TRAVEL_DIST\n")
    for line in pointTravelDepth:
      lineOut = "%8d" % line[0]
      for count in xrange(1, len(line)):
        lineOut += "%+9.4f " % line[count]
      noPlusLine = string.replace(lineOut, "+", " ")
      tstFile.write(noPlusLine)
      tstFile.write("\n")
    tstFile.write("END DEPTH_TRAVEL_DIST\n")
    tstFile.close()
  print "pocket mapping complete"
import glob
import os
import geometry
import string

size = 256  # should be enough for anybody
databaseDir = 'database'
pdbLocation = 'pdbs'
radius = 1.8 #radius of atoms, estimate
try:
  os.mkdir(databaseDir)
except OSError:
  pass  # directory exists, fine

for onePdb in glob.iglob(os.path.join(pdbLocation, '*.pdb')):  # every PDB
  pdbD = pdb.pdbData(onePdb)
  pdbCode = string.split(os.path.split(onePdb)[-1], '.')[0]
  points = pdbD.coords
  try:
    os.mkdir(os.path.join(databaseDir, pdbCode))
  except OSError:
    pass  # directory exists, fine
  for vector in xrange(3):
    for thetaTen in xrange(0, 62, 3):
      vecX, vecY, vecZ = 0., 0., 0.
      if vector == 0:
        vecX = 1.
      elif vector == 1:
        vecY = 1.
      elif vector == 2:
        vecZ = 1.
def tstTravelSurfInsideMesh(tstFileName, phiFileName, threshold="auto"):
  '''calculates the burial depth'''
  print "reading in tst and phi files"
  tstD = tstdata.tstData(
      tstFileName,
      necessaryKeys=tstdata.tstData.necessaryKeysForMesh + ['PDB_RECORD'])
  phiData = phi(phiFileName)  # read in the phimap if possible
  if 'CONVEX_HULL_TRI_POINT_LIST' not in tstD.dict.keys():
    print "Run tstConvexHull.py on this tst data file first."
    sys.exit(1)
  #these sets are useful to construct
  convexHullPoints = set()
  for record in tstD.dict['CONVEX_HULL_TRI_POINT_LIST']:
    convexHullPoints.update(record[1:])
  maxPhi = phiData.getMaxValues()
  if threshold == "auto" and maxPhi == 1.0:
    threshold = 0.6
  if threshold == "auto" and maxPhi == 10.0:
    threshold = 6.0
  gridD, mins, maxs = grid.makeTrimmedGridFromPhi(
      phiData, tstD.dict['POINT_XYZ'], convexHullPoints, threshold, 0, -2, 2)
  gridSize = 1.0 / phiData.scale
  del phiData  # no longer needed in this function, so delete this reference
  #do the biggest disjoint set of tris/points stuff
  allPoints, allTris, cavPoints, cavTris = cavity.assumeNoCavities(
      tstD.dict['POINT_XYZ'], tstD.dict['TRIANGLE_POINT'],
      tstD.dict['POINT_NEIGHBOR'])
  #here is where code is mesh-specific
  print "setting up mesh data structure"
  meshData = mesh.mesh(
      gridD, tstD.dict['POINT_XYZ'], tstD.dict['POINT_NEIGHBOR'],
      gridSize, -2, 0, "X")  # no between
  print "calculating burial depth"
  meshData.calculateTravelDistance("travelin", [3], [1])
  gridTravelInDepth = meshData.getGridTravelDistance(gridD, "travelin")
  #tstdebug.debugGridCountVals(gridTravelInDepth)
  print "writing phi file output"
  phiDataOut = phi()
  phiDataOut.createFromGrid(
      gridTravelInDepth, gridSize, toplabel="travel depth surf-in")
  phiDataOut.write(tstFileName+".mesh.travel.in.phi")
  print "writing pdb file output"
  pdbD = pdb.pdbData()
  for line in tstD.dict['PDB_RECORD']:
    pdbD.processLine(line)
  atomTravelInDepths = grid.assignAtomDepths(
      gridTravelInDepth, gridSize, mins, maxs, pdbD)
  #make a pdb file with the bfactor replaced
  for index, atomTID in enumerate(atomTravelInDepths):
    pdbD.updateFactors(index, (pdbD.factors[index][0], atomTID))
  pdbD.write(tstFileName+".mesh.travelin.pdb")
  #also add record to tstD
  atomTIDRecord = []
  for index, atomTID in enumerate(atomTravelInDepths):
    atomTIDRecord.append([index + 1, atomTID])
  print "updating tst file"
  tstD.dict['ATOM_TRAVEL_IN'] = atomTIDRecord
  #write data into tst file
  tstFile = open(tstFileName, 'a')
  tstFile.write("ATOM_TRAVEL_IN\n")
  for line in tstD.dict['ATOM_TRAVEL_IN']:
    lineOut = "%8d" % line[0]
    for count in xrange(1, len(line)):
      lineOut += "%+9.4f " % line[count]
    noPlusLine = string.replace(lineOut, "+", " ")
    tstFile.write(noPlusLine)
    tstFile.write("\n")
  tstFile.write("END ATOM_TRAVEL_IN\n")
  tstFile.close()
  print "burial depth done"
#!/usr/bin/env python
#ryan g. coleman [email protected]

#outputs all NMR models as single pdb files

#grab system, string,  regular expression, and operating system modules
import sys
import string
import re
import os
import math
import pdb  # for chain sorting ease

if -1 != string.find(sys.argv[0], "outputAllNMRpdb.py"):
  fileName = sys.argv[1]
  pdbD = pdb.pdbData(fileName)
  modelNums = pdbD.getModelNumbers()
  for modelNum in modelNums:
    newPdb = pdbD.getOneModel(modelNum)
    outputFileName = sys.argv[1][:-4] + "." + str(modelNum) + ".pdb"
    newPdb.write(outputFileName)
def tstTravelSurfInside(tstFileName, phiFileName=False):
  tstD = tstdata.tstData(tstFileName)  # read the file into the data structure
  phiData = phi(phiFileName)   # read in the phimap if possible
  if 'CONVEX_HULL_TRI_POINT_LIST' not in tstD.dict.keys():
    print "Run tstConvexHull.py on this tst data file first."
    sys.exit(1)
  #these sets are useful to construct
  convexHullPoints = set()
  for record in tstD.dict['CONVEX_HULL_TRI_POINT_LIST']:
    convexHullPoints.update(record[1:])
  gridD, mins, maxs = grid.makeTrimmedGridFromPhi(
      phiData, tstD.dict['POINT_XYZ'], convexHullPoints, 0.6, 0, -2.0, 2)
  gridSize = 1.0/phiData.scale
  del phiData  # no longer needed in this function, so delete this reference
  #do the biggest disjoint set of tris/points stuff
  allPoints, allTris, cavPoints, cavTris = cavity.assumeNoCavities(
      tstD.dict['POINT_XYZ'], tstD.dict['TRIANGLE_POINT'],
      tstD.dict['POINT_NEIGHBOR'])
  #here's the (relatively simple) surface travel distance calculation finally
  #  assign following encoding -1 = outside ch, 0 = on border,
  #   pos ints = dist from border, -2 = far inside ms,
  #   other neg ints = -(dist)-3
  #whole algorithm wrapped into big function...
  extraEdges, surfaceEdgeBoxes = grid.findLongSurfEdges(
      tstD.dict['POINT_XYZ'], tstD.dict['POINT_NEIGHBOR'], gridSize, mins, maxs)
  for surfaceEdgeBox in surfaceEdgeBoxes.keys():
    x, y, z = gridD[surfaceEdgeBox[0]][surfaceEdgeBox[1]][surfaceEdgeBox[2]][1:]
    gridD[surfaceEdgeBox[0]][surfaceEdgeBox[1]][surfaceEdgeBox[2]] = (
        -1., x, y, z)
  pointTravelDist, traceback, volumePointDepths = \
      travelDistNoMesh.calcTravelDist(
          gridD, tstD.dict['POINT_XYZ'], gridSize, mins, maxs, allPoints,
          extraEdges, surfaceEdgeBoxes, tstFileName)
  #transform grid to actual travel distance
  maxTD = grid.finalizeGridTravelDist(gridD, gridSize)
  phiDataOut = phi()
  phiDataOut.createFromGrid(gridD, gridSize, toplabel="travel depth surf-in")
  phiDataOut.write(tstFileName+".travel.in.phi")
  pdbD = pdb.pdbData()
  for line in tstD.dict['PDB_RECORD']:
    pdbD.processLine(line)
  atomTravelInDepths = grid.assignAtomDepths(gridD, gridSize, mins, maxs, pdbD)
  #make a pdb file with the bfactor replaced
  for index, atomTID in enumerate(atomTravelInDepths):
    pdbD.updateFactors(index, (pdbD.factors[index][0], atomTID))
  pdbD.write(tstFileName+".travelin.pdb")
  #also add record to tstdata
  atomTIDRecord = []
  for index, atomTID in enumerate(atomTravelInDepths):
    atomTIDRecord.append([index+1, atomTID])
  tstD.dict['ATOM_TRAVEL_IN'] = atomTIDRecord
  #write data into tst file
  tstFile = open(tstFileName, 'a')
  tstFile.write("ATOM_TRAVEL_IN\n")
  for line in tstD.dict['ATOM_TRAVEL_IN']:
    lineOut = "%8d" % line[0]
    for count in xrange(1, len(line)):
      lineOut += "%+9.4f " % line[count]
    noPlusLine = string.replace(lineOut, "+", " ")
    tstFile.write(noPlusLine)
    tstFile.write("\n")
  tstFile.write("END ATOM_TRAVEL_IN\n")
  tstFile.close()
def copyCode(pdbCode):
  fileName = grab_pdb.getCode(pdbCode)
  pdbD = pdb.pdbData(fileName)
  pdbD.write(pdbCode + ".-.pdb")
def getCodeChain(pdbCode, chain):
  fileName = grab_pdb.getCode(pdbCode)
  pdbD = pdb.pdbData(fileName)
  newPdbD = pdbD.getOneChain(chain)
  newPdbD.write(pdbCode + "." + chain + ".pdb")
Example #24
0
def makeTst(
    pdbFileName, gridSpacing, pathTo="$TDHOME/bin/", whichSrf="tri",
    probeSize=False, radScaleIn=False):
  '''
  figures out how to create proper grid spacing, calls fortran programs
  '''
  pathTo = os.path.expandvars(pathTo)
  if "tri" == whichSrf:  # pick which method to use, default
    srf = trisrf
    probe = triprobe
  elif "mesh" == whichSrf:  # alternate, better for tunnels
    srf = meshsrf
    probe = meshprobe
  if probeSize:  # means use non-default value...
    probe = probeSize
  if radScaleIn:  # same, use non-default value
    radScaleUse = float(radScaleIn)
  else:
    radScaleUse = radscale
  pdbEntry = pdb.pdbData(pdbFileName)
  rootNameTemp = string.replace(pdbFileName, ".pdb", "")
  rootName = string.replace(rootNameTemp, ".PDB", "")
  tstFileName = rootName + ".tst"
  triFileName = rootName + ".tri"
  phiFileName = rootName + ".phi"
  mins, maxs = [xVal for xVal in pdbEntry.coords[0]], [
      xVal for xVal in pdbEntry.coords[0]]
  atoms = pdbEntry.atoms
  for dimension in range(3):
    for index, coord in enumerate(pdbEntry.coords):
      mins[dimension] = min(
          mins[dimension], coord[dimension] -
          (pdb.radiiDefault[atoms[index][0]]*radScaleUse))
      maxs[dimension] = max(
          maxs[dimension], coord[dimension] +
          (pdb.radiiDefault[atoms[index][0]]*radScaleUse))
  difference = 0
  for dimension in range(3):
    difference = max(difference, maxs[dimension] - mins[dimension])
  length = difference + 2. * probe
  gridScale = 1. / float(gridSpacing)
  possibleGridSizes = srf.keys()
  possibleGridSizes.sort()
  for possibleGridSize in possibleGridSizes:
    percentFill = gridScale * 100 * length / (possibleGridSize - 1)
    if percentFill < 99:
      break   # keep these settings, they are good enough
    if possibleGridSizes[-1] == possibleGridSize:
      print "no grid size big enough, either make new version of tst or " + \
          "adjust grid size parameter"
      sys.exit(1)
  srfExecutable = pathTo + srf[possibleGridSize]
  if not os.path.exists(srfExecutable):
    print "the surface preparation executable does not exist at: ", \
        srfExecutable
    exit(1)
  if "tri" == whichSrf:
    execString = srfExecutable + "  " + \
        pdbFileName + " " + str(contour)   # run trisrf
  elif "mesh" == whichSrf:
    execString = srfExecutable + "  " + \
        pdbFileName + " " + str(probe) + " " + str(radScaleUse)  # run meshsrf
  #print percentFill, execString
  try:
    os.unlink("trisrf.tri")
  except OSError:
    pass  # this is okay, just making sure it is deleted
  trisrfProc = os.popen4(execString)
  if "tri" == whichSrf:  # pick which method to use, default
    trisrfProc[0].write(str(percentFill) + "\n33\n")
  elif "mesh" == whichSrf:  # alternate, better for tunnels
    trisrfProc[0].write(str(percentFill) + "\n")
  trisrfProc[0].flush()
  trisrfProc[0].close()
  finishedRunningSrf = trisrfProc[1].read()
  log = open(tstFileName + ".log", 'w')
  log.write(finishedRunningSrf)
  if "tri" == whichSrf:  # pick which method to use, default
    try:
      os.rename("trisrf.tri", triFileName)
      os.rename("trisrf.phi", phiFileName)
    except OSError:  # actual problem
      print "trisrf did not make .tri file, check logs"
      log.close()
      return False
  elif "mesh" == whichSrf:  # alternate, better for tunnels
    try:
      os.rename("meshsrfA.tri", triFileName)
      os.rename("meshsrfA.phi", phiFileName)
    except OSError:  # actual problem
      print "meshsrf did not make .tri file, check logs"
      log.close()
      return False
  if not os.path.exists(pathTo + gen):
    print "the surface generation executable does not exist at: " + pathTo + \
        gen
    exit(1)
  trigenProc = os.popen4(pathTo + gen + " " + triFileName + " " + tstFileName)
  trigenProc[0].flush()
  trigenProc[0].close()
  finishedRunningGen = trigenProc[1].read()
  try:
    os.unlink("trilinel.dat")
    os.unlink("triline.usr")
    os.unlink("trinext.dat")
    os.unlink("trisrf.pdb")
    os.unlink("trisrf.rec")
    os.unlink("trisrf.usr")
    os.unlink("fort.10")
    os.unlink("trigen.py")
    os.unlink("triline.py")
  except OSError:
    pass  # again, just cleaning up junk files
  try:
    os.unlink("mesh.pdb")
    os.unlink("meshline.usr")
    os.unlink("meshlinel.dat")
    os.unlink("meshtri.dat")
    os.unlink("fort.10")
    os.unlink("trigen.py")
    os.unlink("triline.py")
  except OSError:
    pass  # again, just cleaning up junk files
  log.write(finishedRunningGen)
  log.close()
  return True  # indicates success
#!/usr/bin/env python

#reads in what are assumed to be ligand pdb files
#for each one, cluster based on some threshold, break into distinct clusters,
#write each cluster

import string
import sys
import pdb

if -1 != string.find(sys.argv[0], "clusterLigands.py"):
  filenames = sys.argv[1:]
  for filename in filenames:
    pdbD = pdb.pdbData(filename)
    clusters = pdbD.clusterAtoms(distanceCutoff=5.0)
    #print filename, len(clusters) #debug
    width = len(str(len(clusters)))
    for clusterIndex, cluster in enumerate(clusters):
      outputNum = string.zfill(clusterIndex, width)
      outputName = outputNum + "_" + filename
      cluster.write(outputName)
import glob
import os
import geometry
import string

size = 256  # should be enough for anybody
databaseDir = 'database'
pdbLocation = 'pdbs'
radius = 1.8  #radius of atoms, estimate
try:
    os.mkdir(databaseDir)
except OSError:
    pass  # directory exists, fine

for onePdb in glob.iglob(os.path.join(pdbLocation, '*.pdb')):  # every PDB
    pdbD = pdb.pdbData(onePdb)
    pdbCode = string.split(os.path.split(onePdb)[-1], '.')[0]
    points = pdbD.coords
    try:
        os.mkdir(os.path.join(databaseDir, pdbCode))
    except OSError:
        pass  # directory exists, fine
    for vector in xrange(3):
        for thetaTen in xrange(0, 62, 3):
            vecX, vecY, vecZ = 0., 0., 0.
            if vector == 0:
                vecX = 1.
            elif vector == 1:
                vecY = 1.
            elif vector == 2:
                vecZ = 1.
#!/usr/bin/env python

import sys
import pdb

OK_RESIDUES = set()
OK_RESIDUES.update(pdb.aminoAcid3Codes)
OK_RESIDUES.update(pdb.keepPolarH.keys())
OK_RESIDUES.remove('HEM')  # Only keep ions and waters in addition

p = pdb.pdbData(sys.argv[1], ignoreWaters=False)
mode = sys.argv[3]

for idx, line in enumerate(p.rawData):
    p.rawData[idx] = line[:79]

if mode == 'final':
#    p.replaceHETATMwithATOM()
    p.removeApolarHydrogen()
    p.write(sys.argv[2]+'.H')
    p = pdb.pdbData(sys.argv[2]+'.H', ignoreWaters=False)
    p.renameHistidines()
    p.renameCysteines()
    

for idx, resName in enumerate(p.resNames):
    if resName not in OK_RESIDUES:
        p.removeLine(idx)

for idx, alt in enumerate(p.altChars):
    if alt not in (' ', 'A'):
#!/usr/bin/env python

#ryan g. coleman [email protected] [email protected]
#kim sharp lab http://crystal.med.upenn.edu
#finds all inter-atom distances

import string
import sys
import geometry
import pdb

if -1 != string.find(sys.argv[0], "pdbDistances.py"):
  try:
    for pdbName in sys.argv[1:]:
      pdbD = pdb.pdbData(pdbName)
      outputName = pdbName.replace("pdb", "").replace(".", "")
      longestDist, meanDist = geometry.longestAndMeanDist(
          pdbD.getHeavyAtomXYZ())
      print outputName, "\t", longestDist, "\t", meanDist
  except IndexError:
    print "pdbDistances.py pdbName [list of more pdbs]"
    print "outputs to standard out"
    sys.exit(1)
#!/usr/bin/env python

# reads in a list of codes, finds pdb + ligand files, outputs
# residues nearby each ligand in the pdb

import string
import sys
import pdb
import glob

if -1 != string.find(sys.argv[0], "getResiduesNearbyLigands.py"):
    prefixes = sys.argv[1:]
    for prefix in prefixes:
        files = glob.glob("*" + prefix + "*pdb")
        mainPdb, ligandPdbs = False, []
        for filename in files:
            if -1 == filename.find("ligand"):  # is main
                mainPdb = filename
            elif -1 == filename.find("nearby"):  # is not output from previous run
                ligandPdbs.append(filename)
        for ligandName in ligandPdbs:
            ligandPdbD = pdb.pdbData(ligandName)
            mainPdbD = pdb.pdbData(mainPdb)
            nearbyPdb = mainPdbD.getNearbyResidues(ligandPdbD.coords, 5.0)
            nearbyPdb.write("nearby_" + ligandName)
            nearbyPdb = pdb.pdbData("nearby_" + ligandName)
            justResString = pdb.turnListIntoString(nearbyPdb.getResidueNamesChains())
            outFile = open("nearby_" + ligandName + ".res", "w")
            outFile.write(justResString)
            outFile.close()
def checkPathBarriers(prefix):
  tstName = prefix + ".nocav.tst"
  findHolesName = tstName + ".findholes.log"
  findHolesFile = open(findHolesName, 'r')
  findHolesLines = findHolesFile.readlines()
  findHolesFile.close()
  HolesName = tstName + ".sideshole.log"  # holds all the output
  goodHolesName = tstName + ".good.sideshole.log"  # just the 1 1 0 1 1
  sideHolesName = tstName + ".side.sideshole.log"  # just the * * 1 * *
  badHolesName = tstName + ".bad.sideshole.log"  # all others
  pdbWithBarriersFileName = "planes_" + prefix + ".pdb"
  pdbBarriers = pdb.pdbData(pdbWithBarriersFileName)
  #get the barriers read in and defined
  barrierAtomList = [[], []]
  for index, resName in enumerate(pdbBarriers.resNames):
    if resName == "DUM":
      if pdbBarriers.atoms[index][0] == "O":
        barrierAtomList[0].append(pdbBarriers.coords[index])
      elif pdbBarriers.atoms[index][0] == "N":
        barrierAtomList[1].append(pdbBarriers.coords[index])
  barrierZ = [barrierAtomList[0][0][2], barrierAtomList[1][0][2]]
  barrierZ.sort()
  barrierSep = geometry.distL2(barrierAtomList[0][0], barrierAtomList[1][0])
  #barrier is just Z coordinate
  #setup for main loop over paths
  poreSuffix = ".pore.py"
  logFile = open(HolesName, 'w')
  goodLogFile = open(goodHolesName, 'w')
  sideLogFile = open(sideHolesName, 'w')
  badLogFile = open(badHolesName, 'w')
  #the following 5 things are calculated and written for each path, headers
  #the 6th, barrier separation, is really the same for each structure
  logFile.write("endsBeyond1count barrier1count endsBetweenCount ")
  logFile.write("barrier2count endsBeyond2count barrierSeparation\n")
  goodLogFile.write("prefix ")
  goodLogFile.write(string.strip(findHolesLines[0]) + " ")
  goodLogFile.write("endsBeyond1count barrier1count endsBetweenCount ")
  goodLogFile.write("barrier2count endsBeyond2count barrierSeparation\n")
  sideLogFile.write("prefix ")
  sideLogFile.write(string.strip(findHolesLines[0]) + " ")
  sideLogFile.write("endsBeyond1count barrier1count endsBetweenCount ")
  sideLogFile.write("barrier2count endsBeyond2count barrierSeparation\n")
  badLogFile.write("prefix ")
  badLogFile.write(string.strip(findHolesLines[0]) + " ")
  badLogFile.write("endsBeyond1count barrier1count endsBetweenCount ")
  badLogFile.write("barrier2count endsBeyond2count barrierSeparation\n")
  holeNumber = 1
  poreFile = tstName + "." + str(holeNumber) + poreSuffix
  print poreFile
  paths = []
  sides, goods = [], []
  endsToPaths = {}
  pathsToEnds = {}
  while os.path.exists(poreFile):
    path = comparePaths.readCGOPath(poreFile)
    pathRad = comparePaths.readCGOPathWithRadius(poreFile)
    paths.append(pathRad)
    pathNum = len(paths) - 1
    for end in string.split(findHolesLines[holeNumber])[1:3]:
      if pathNum not in pathsToEnds:
        pathsToEnds[pathNum] = []
      pathsToEnds[pathNum].append(end)
      if end not in endsToPaths:
        endsToPaths[end] = []
      endsToPaths[end].append(pathNum)
    intersections = [0, 0]
    for index, barrier in enumerate(barrierZ):
      intersections[index] = countCrossingsZ(path, barrier)
    ends = [0, 0, 0]
    for endPoint in [path[0], path[-1]]:
      endPointZ = endPoint[2]
      if endPointZ < barrierZ[0] and endPointZ < barrierZ[1]:
        ends[0] += 1
      elif endPointZ >= barrierZ[0] and endPointZ <= barrierZ[1]:
        ends[1] += 1
      elif endPointZ > barrierZ[0] and endPointZ > barrierZ[1]:
        ends[2] += 1
    outputThisTime = str(ends[0]) + " " + str(intersections[0]) + " " + \
        str(ends[1]) + " " + str(intersections[1]) + " " + \
        str(ends[2]) + " " + str(barrierSep) + " "
    logFile.write(outputThisTime)
    logFile.write("\n")
    if ends[0] + ends[1] + ends[2] != 2:
      print "problems sorting out the ends"
    if ends[0] == 1 and ends[2] == 1 and intersections == [1, 1]:
      #it is 'good'
      goods.append(pathNum)
      goodLogFile.write(prefix + " ")
      goodLogFile.write(string.strip(findHolesLines[holeNumber]) + " ")
      goodLogFile.write(outputThisTime + "\n")
    elif ends[1] == 1:
      sides.append(pathNum)
      sideLogFile.write(prefix + " ")
      sideLogFile.write(string.strip(findHolesLines[holeNumber]) + " ")
      sideLogFile.write(outputThisTime + "\n")
    else:
      badLogFile.write(prefix + " ")
      badLogFile.write(string.strip(findHolesLines[holeNumber]) + " ")
      badLogFile.write(outputThisTime + "\n")
    #and that is it for this path
    holeNumber += 1     # get set up for next pass
    poreFile = tstName + "." + str(holeNumber) + poreSuffix
  logFile.close()
  goodLogFile.close()
  sideLogFile.close()
  badLogFile.close()
  #next lines are for debugging the new data structures
  '''
  print sides
  print goods
  print endsToPaths
  print pathsToEnds
  '''
  #now want to find side branches of good paths
  branches = 0
  branchSuffix = ".branch.py"
  branchFile = tstName + "." + str(branches) + branchSuffix
  branchLog = open(tstName + ".branchholes.log", 'w')
  branchLog.write(string.strip(findHolesLines[0]) + "\n")
  for side in sides:
    foundGoods = []
    for sideEnd in pathsToEnds[side]:
      for good in goods:
        for goodEnd in pathsToEnds[good]:
          if goodEnd == sideEnd:
            foundGoods.append(good)
    if len(foundGoods) > 0:
      branchedPath = paths[side]  # start with whole path
      for good in foundGoods:  # remove physiological intersecting paths
        branchedPath = pathsModule.subtractPaths(branchedPath, paths[good])
      if len(branchedPath) > 0:  # has to have some length remaining
        branches += 1
        branchFile = tstName + "." + str(branches) + branchSuffix
        print branches, side, foundGoods
        tstdebug.debugSetGridSpheres(
            branchedPath, 0.5, branchFile, radius=True,
            mainColor=(0.01, 0.9, 0.35))
        branchLog.write(str(branches) + " ")
        branchLog.write(str(pathsToEnds[side][0]) + " ")
        branchLog.write(str(pathsToEnds[side][1]) + " ")
        branchLog.write("- ")  # dummy, not real
        branchLog.write("0. 0. 0. 0. 0. 0. 0. 0. 0. 0. \n")
  branchLog.close()
  addFoundHoleStats.redoFindholes(
      prefix, nearbyDistance=4., logExt=".branchholes.log",
      poreSuffix=".branch.py", nearbyName=".branch")
      resNumStr = goodNode.attributes[columnNums[9]]
      newPdb = pdbData.getListResiduesChains(resNumStr)
      #open the output file
      outFile = open(
          str(paramCount) + "." + str(goodCount) + ".xtal-lig.pdb", 'w')
      outFile.write("REMARK parameter set: " + str(paramCount) + "\n")
      for name, value in params[paramCount].iteritems():
        outFile.write("REMARK paramater " + name + ": " + str(value) + "\n")
      outFile.write("REMARK pocket number: " + str(goodCount) + "\n")
      #write a bunch of remarks about all the attributes
      for colCount in xrange(len(columnNames)):
        outFile.write(
            "REMARK " + str(columnNames[colCount]) + ": " +
            str(goodNode.attributes[columnNums[colCount]]) + "\n")
      newPdb.outputLines(outFile)  # write the actual PDB file
      outFile.close()

if -1 != string.find(sys.argv[0], "tm3pickerPdb.py"):
  pdbData = pdb.pdbData(sys.argv[1])
  tmData = tm3.tmTreeFromFile(sys.argv[2])
  paramsNew = None
  if len(sys.argv) > 8:  # user wants to declare some an extra set of parameters
    paramsNew = {}
    paramNames = [
        "volMin", "volMax", "apolarMin",
        "polarMin", "apolarFractionMax", "polarFractionMax"]
    for count in xrange(3, 9):
      paramsNew[paramNames[count - 3]] = float(sys.argv[count])
    print "user declare pocket picker parameters:", paramsNew
  pickPocketsPdb(pdbData, tmData, paramsNew)