def analyzePdbB(filenameList=False): residues = {} # a dict of dicts where the sub-dicts are keyed on atom name eachRes = {} # dict keyed on RESNAME-RESNUM if filenameList: for filename in filenameList: pdbD = pdb.pdbData(filename) for index, resName in enumerate(pdbD.resNames): if pdbD.radii[index] > 0.: if resName not in residues: residues[resName] = {} # init sub-dict if string.strip(pdbD.atoms[index]) not in residues[resName]: residues[resName][string.strip(pdbD.atoms[index])] = [] residues[resName][string.strip(pdbD.atoms[index])].append( pdbD.factors[index][1]) resNum = pdbD.resNums[index] longName = str(resName) + str(resNum) if longName not in eachRes: eachRes[longName] = {} # init sub-dict if string.strip(pdbD.atoms[index]) not in eachRes[longName]: eachRes[longName][string.strip(pdbD.atoms[index])] = [] eachRes[longName][string.strip(pdbD.atoms[index])].append( pdbD.factors[index][1]) #residues now contains all the b-factor (travelin) data makeResidueReport(residues) makeResidueReport( eachRes, outputFilename="individual.res.bfactor", runGraphs=False) makeAtomReport(residues) makeAtomReport( eachRes, outputFilename="individual.atom.bfactor", runGraphs=False) makeHistogramReport(residues)
def getJustMembranePdb(inputFileName, outputFileName): pdbBarriers = pdb.pdbData(inputFileName) #get the barriers read in and defined barrierAtomList = [[], []] for index, resName in enumerate(pdbBarriers.resNames): if resName == "DUM": if pdbBarriers.atoms[index][0] == "O": barrierAtomList[0].append(pdbBarriers.coords[index]) elif pdbBarriers.atoms[index][0] == "N": barrierAtomList[1].append(pdbBarriers.coords[index]) barrierZ = [barrierAtomList[0][0][2], barrierAtomList[1][0][2]] barrierZ.sort() barrierSep = geometry.distL2(barrierAtomList[0][0], barrierAtomList[1][0]) zCoord = barrierZ[1] goodResChain = [] for index, thisResNum in enumerate(pdbBarriers.resNums): chain = pdbBarriers.chains[index] resChain = str(thisResNum) + str(chain) if resChain not in goodResChain: #otherwise don't need to check, already in zTest = pdbBarriers.coords[index][2] if abs(zTest) <= zCoord: goodResChain.append(resChain) newPdb = pdbBarriers.getListResiduesChains(goodResChain) newPdb.write(outputFileName)
def repairPointPdbRecord(tstD=None, tstFileName=False): '''checks and repairs pointpdbrecord if it has no data in it''' same = True if tstD is None: # hasn't been read in already tstD = tstdata.tstData( tstFileName, necessaryKeys=tstdata.tstData.necessaryKeysForPocket) lastPdbNum = tstD.dict['POINT_PDB_RECORD'][0][1]-1 for pointPdb in tstD.dict['POINT_PDB_RECORD']: pdbNum = pointPdb[1]-1 if pdbNum != lastPdbNum: same = False if same: # needs repaired, otherwise this is over pdbD = pdb.pdbData() for line in tstD.dict['PDB_RECORD']: pdbD.processLine(line) ptCoords = {} for pointXyz in tstD.dict['POINT_XYZ']: ptCoords[pointXyz[0]] = tuple(pointXyz[1:]) coordToNearbyAtoms = pdbD.getNearbyAtoms(ptCoords.values()) newPointPdbRec = [] for pointPdb in tstD.dict['POINT_PDB_RECORD']: atomFound = coordToNearbyAtoms[ptCoords[pointPdb[0]]][0] newPointPdbRec.append([pointPdb[0], atomFound]) #replace old record with new record tstD.dict['POINT_PDB_RECORD'] = newPointPdbRec if tstFileName: tstFile = open(tstFileName, 'a') # append into file tstdata.writeEntryIntegers( tstD.dict['POINT_PDB_RECORD'], "POINT_PDB_RECORD", "END POINT_PDB_RECORD", tstFile) tstFile.close()
def calcRMSDprintTable(pdbFileNames): pdbDatas = [] for pdbFileName in pdbFileNames: pdbDatas.append(pdb.pdbData(pdbFileName)) matrix = [] for index1 in xrange(len(pdbDatas)): tempRow = [] for index2 in xrange(len(pdbDatas)): if index1 > index2: rmsd = pdbDatas[index1].calcRMSD(pdbDatas[index2], alphas=True) tempRow.append(rmsd) matrix.append(tempRow) print "matrix", for index1 in xrange(len(pdbDatas)): print pdbFileNames[index1], print " " # title row done for index1 in xrange(len(pdbDatas)): print pdbFileNames[index1], for index2 in xrange(len(pdbDatas)): if index1 == index2: # always 0 print 0., elif index1 > index2: print matrix[index1][index2], else: print matrix[index2][index1], print " " # this row done
def readTri(triName): '''reads a tri file, returns the data inside''' pdbD = pdb.pdbData() triLineToNum = {} triPoints, oneTri = [], [] triFile = open(triName, 'r') try: inPdb, inTri = False, False for line in triFile: if line.startswith('PDB_RECORD'): inPdb = True elif line.startswith('END PDB_RECORD'): inPdb = False elif line.startswith('TRIANGLE_XYZ'): inTri = True elif line.startswith('END TRIANGLE_XYZ'): inTri = False else: # actually do some processing of lines if inPdb: pdbD.processLine(line) elif inTri: strippedLine = string.strip(line) if strippedLine not in triLineToNum: triLineToNum[strippedLine] = len(triLineToNum) + 1 triNum = triLineToNum[strippedLine] oneTri.append(triNum) if len(oneTri) == 3: triPoints.append(oneTri) oneTri = [] except StopIteration: # EOF pass return pdbD, triLineToNum, triPoints
def pdbMostOccupied(pdbFileName, outName=None, exceptions=[]): pdbEntry = pdb.pdbData(pdbFileName) newPdb = pdbEntry.copy() #print newPdb.altChars.count('A'), newPdb.altChars.count('B') newPdb.selectMostOccupied(exceptions=exceptions) #print newPdb.altChars.count('A'), newPdb.altChars.count('B') newPdb.write(outName)
def tstTravelDepthMesh( tstFileName, phiFileName, ligandFileName=None, cavities=False, threshold="auto"): '''sets up a normal travel depth mesh run, calls tstTravelDepthMeshRun. if cavities is set, it means we want the (new) travel depth of those too''' distanceName = 'traveldepth' tstD = tstdata.tstData(tstFileName) # read the file into the data structure phiData = phi(phiFileName) # read in the phimap if possible phiGridSpacing = 1. / phiData.scale phiTravelDepthGrid, phiTravelDepth, meshData = tstTravelDepthMeshRun( tstD, phiData, tstFileName, cavities=cavities, threshold=threshold) # modifies tstD in place if ligandFileName is not None: ligand = pdb.pdbData(ligandFileName) ligandXYZR = ligand.getHeavyAtomXYZRadius() betweenList = meshData.getBetweenNodes() surfaceList = meshData.getSurfaceNodes() nodeWithinSet = meshData.getWithinNodesNoInside(ligandXYZR) #print len(nodeWithinSet), len(ligandXYZR) tracebackSet = meshData.getTracebackSet(nodeWithinSet, distanceName) #print len(tracebackSet) minW, maxW, meanW = meshData.getMinMaxMeanNodes(nodeWithinSet, distanceName) minT, maxT, meanT = meshData.getMinMaxMeanNodes(tracebackSet, distanceName) minB, maxB, meanB = meshData.getMinMaxMeanNodes(betweenList, distanceName) minS, maxS, meanS = meshData.getMinMaxMeanNodes(surfaceList, distanceName) #print all on one line, header printed earlier print minW, maxW, meanW, print minT, maxT, meanT, print minB, maxB, meanB, print minS, maxS, meanS, volumeWithin = len(nodeWithinSet) * phiGridSpacing**3. volumeTrace = len(tracebackSet) * phiGridSpacing**3. print volumeWithin, volumeTrace # newline wanted here so no comma #print phiGridSpacing listWithin, listTrace = [], [] for node in nodeWithinSet: listWithin.append(node.getXYZ()) for node in tracebackSet: listTrace.append(node.getXYZ()) tstdebug.pointDebug(listWithin, filename=ligandFileName+".within.py") tstdebug.pointDebug(listTrace, filename=ligandFileName+".trace.py") #tstdebug.debugGridCountVals(phiTravelDepthGrid) #transform grid to actual travel distance phiTravelDepth.write(tstFileName+".travel.phi") #write data to file tstFile = open(tstFileName, 'a') tstFile.write("DEPTH_TRAVEL_DIST\n") for line in tstD.dict['DEPTH_TRAVEL_DIST']: lineOut = "%8d" % line[0] for count in xrange(1, len(line)): lineOut += "%+9.4f " % line[count] noPlusLine = string.replace(lineOut, "+", " ") tstFile.write(noPlusLine) tstFile.write("\n") tstFile.write("END DEPTH_TRAVEL_DIST\n") tstFile.close()
def getClosestToAverage(fileName, skipAlign=True): '''takes a pdb file of an nmr structure, superposes all structures, finds euclidean average, then finds closest original model to the average, then picks it out as the one to use''' if not skipAlign: import superimpose # superposition function, calls fortran code pdbD = pdb.pdbData(fileName) modelNums = pdbD.getModelNumbers() firstModel = False alignedModels = [] for modelNum in modelNums: newPdb = pdbD.getOneModel(modelNum) outputFileName = fileName[:-4] + ".model." + str(modelNum) + ".pdb" newPdb.write(outputFileName) if not firstModel: firstModel = outputFileName # don't align first else: if not skipAlign: alignFileName = "align." + outputFileName superimpose.superposition(firstModel, outputFileName, alignFileName) alignedModels.append(alignFileName) else: alignedModels.append(outputFileName) firstModelPdbD = pdb.pdbData(firstModel) averageFileName = fileName[:-4] + ".average.pdb" averagePdbD = firstModelPdbD.getAverageCoords(alignedModels) averagePdbD.write(averageFileName) bestModelName = firstModel bestRMSD = averagePdbD.calcRMSDfile(firstModel) for alignModel in alignedModels: otherRMSD = averagePdbD.calcRMSDfile(alignModel) if otherRMSD < bestRMSD: bestRMSD = otherRMSD bestModelName = alignModel #print bestRMSD, alignModel bestFileName = fileName[:-4] + ".best.pdb" modelNumber = bestModelName.replace(".pdb", "").replace( "align."+fileName[:-4]+".model.", "").replace(fileName[:-4]+".model.", "") pdbD = pdb.pdbData(fileName) newPdb = pdbD.getOneModel(int(modelNumber)) newPdb.write(bestFileName) # writes all other file info not just coords
def analyzePdbBCompareFiles(firstFilename, compareFilename): residues = ({}, {}) # a dict of dicts where the sub-dicts keys are atom names pdbs = ([], []) # each list is a pdb, which has a dictionary of residues for lindex, listFilename in enumerate((firstFilename, compareFilename)): filenameList = [] if listFilename: listFile = open(listFilename, 'r') try: for line in listFile: if len(line) == 6: filenameList.append( line[0:4] + "." + line[4] + ".nocav.tst.mesh.travelin.pdb") if len(line) == 5: # analyze non-domains filenameList.append(line[0:4] + ".nocav.tst.mesh.travelin.pdb") except StopIteration: pass listFile.close() if filenameList: for filename in filenameList: pdbD = pdb.pdbData(filename) tempRes = {} for index, resName in enumerate(pdbD.resNames): if pdbD.radii[index] > 0.: if resName not in tempRes: tempRes[resName] = {} if resName not in residues[lindex]: residues[lindex][resName] = {} # init sub-dict if string.strip(pdbD.atoms[index]) not in residues[lindex][resName]: residues[lindex][resName][string.strip(pdbD.atoms[index])] = [] if string.strip(pdbD.atoms[index]) not in tempRes[resName]: tempRes[resName][string.strip(pdbD.atoms[index])] = [] #if pdbD.factors[index][1] > 30: # #print pdbD.factors[index][1], filename #debugging #print pdbD.atoms[index], residues[lindex][resName][string.strip(pdbD.atoms[index])].append( pdbD.factors[index][1]) tempRes[resName][string.strip(pdbD.atoms[index])].append( pdbD.factors[index][1]) pdbs[lindex].append(tempRes) #residues now contains all the b-factor (travelin) data #need to change after this line to call new methods listFilename = firstFilename + "." + compareFilename analyzePdbB.makeCompareResidueReport( residues, listFilename + ".residue.bfactor") #do alternate pvalue tests and by file comparisons with pdbs lists corrAll, corrBeta = analyzePdbB.makeCompareResidueReportAlternate( pdbs, listFilename + ".residue.alt.bfactor") #now do a p-value comparison that is corrected for the overall depth diffs analyzePdbB.makeCompareResidueReportAlternate( pdbs, listFilename + ".residue.alt.corr.bfactor", correctionAll=corrAll, correctionBeta=corrBeta) analyzePdbB.makeMeanPerProteinReport( pdbs, outName=listFilename + ".meansbypdb.txt")
def analyzePdbBfromFile(listFilename, buriedThresh=2.): residues = {} # a dict of dicts where the sub-dicts are keyed on atom name buriedResidues, surfaceResidues = {}, {} filenameList = [] if listFilename: listFile = open(listFilename, 'r') try: for line in listFile: if len(line) == 6: filenameList.append( line[0:4] + "." + line[4] + ".nocav.tst.mesh.travelin.pdb") except StopIteration: pass listFile.close() if filenameList: for filename in filenameList: pdbD = pdb.pdbData(filename) for index, resName in enumerate(pdbD.resNames): if pdbD.radii[index] > 0.: atomName = string.strip(pdbD.atoms[index]) bfactors = pdbD.getFactorsByResidueChain( pdbD.resNums[index], pdbD.chains[index]) if buriedThresh > min(bfactors): if resName not in surfaceResidues: surfaceResidues[resName] = {} # init sub-dict if atomName not in surfaceResidues[resName]: surfaceResidues[resName][atomName] = [] surfaceResidues[resName][atomName].append(pdbD.factors[index][1]) else: if resName not in buriedResidues: buriedResidues[resName] = {} # init sub-dict if atomName not in buriedResidues[resName]: buriedResidues[resName][atomName] = [] buriedResidues[resName][atomName].append(pdbD.factors[index][1]) #add it to this either way if resName not in residues: residues[resName] = {} # init sub-dict if atomName not in residues[resName]: residues[resName][atomName] = [] residues[resName][atomName].append(pdbD.factors[index][1]) #residues now contains all the b-factor (travelin) data analyzePdbB.makeResidueReport( residues, listFilename + ".residue.bfactor", maxY=6, maxYBeta=6) # hardcoded for now... analyzePdbB.makeAtomReport(residues, listFilename + ".atom.bfactor") analyzePdbB.makeAtomReport( surfaceResidues, listFilename + ".atom.bfactor.surface") analyzePdbB.makeAtomReport( buriedResidues, listFilename + ".atom.bfactor.buried") analyzePdbB.makeHistogramReport(residues, listFilename + ".histogram.bfactor")
def readSphPathRadius(sphFileName): result, radii = [], [] sphData = pdb.pdbData(sphFileName) # sph is basically PDB format seenZeroYet = False # stupid hack for sph file for index, factors in enumerate(sphData.factors): # if second factor resNum = sphData.resNums[index] if factors[1] > 0.0: # is non-zero, then part of path if resNum > 0: result.append(sphData.coords[index]) radii.append(sphData.factors[index][0]) elif resNum == 0 and not seenZeroYet: result.append(sphData.coords[index]) radii.append(sphData.factors[index][0]) seenZeroYet = True return result, radii
def outputNearbyResidues(pointPath, outName, pdbRawData, nearbyDistance): '''outputs 2 different files. 1. residues near the minimum radius 2. residues near the entire path. near is defined as radius + nearbyDistance''' pdbD = pdb.pdbData() pdbD.processLines(pdbRawData) minRadiusPoint = pointPath[0] for pathPt in pointPath: if pathPt[0] < minRadiusPoint[0]: minRadiusPoint = pathPt residuesNearMin = getNearbyResidues([minRadiusPoint], pdbD, nearbyDistance) pdbNearMin = pdbD.getListResiduesChains(residuesNearMin) pdbNearMin.write(outName + ".residues.pathmin.pdb") residuesNearPath = getNearbyResidues(pointPath, pdbD, nearbyDistance) pdbNearPath = pdbD.getListResiduesChains(residuesNearPath) pdbNearPath.write(outName + ".residues.path.pdb") #that's it... files written, return residue lists in case return residuesNearMin, residuesNearPath
def tstTravelSurfInsideOld(tstFileName, phiFileName=False): '''does the old algorithm of just computing the shortest distance to any surface point from any atom by going through both lists the hard way''' tstD = tstdata.tstData(tstFileName) # read the file into the data structure #do the biggest disjoint set of tris/points stuff allPoints, allTris, cavPoints, cavTris = cavity.assumeNoCavities( tstD.dict['POINT_XYZ'], tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_NEIGHBOR']) pointXyz = tstD.dict['POINT_XYZ'] pdbD = pdb.pdbData() for line in tstD.dict['PDB_RECORD']: pdbD.processLine(line) atomTravelInDepths = [] for coord in pdbD.coords: minDist = geometry.distL2(coord, pointXyz[allPoints[0]-1][1:]) #set to first for point in allPoints[1:]: thisDist = geometry.distL2(coord, pointXyz[point-1][1:]) minDist = min(minDist, thisDist) atomTravelInDepths.append(minDist) #make a pdb file with the bfactor replaced for index, atomTID in enumerate(atomTravelInDepths): pdbD.updateFactors(index, (pdbD.factors[index][0], atomTID)) pdbD.write(tstFileName+".old.atomdepth.pdb") #also add record to tstdata atomTIDRecord = [] for index, atomTID in enumerate(atomTravelInDepths): atomTIDRecord.append([index+1, atomTID]) tstD.dict['ATOM_DEPTH_OLD'] = atomTIDRecord #write data into tst file tstFile = open(tstFileName, 'a') tstFile.write("ATOM_TRAVEL_IN\n") for line in tstD.dict['ATOM_DEPTH_OLD']: lineOut = "%8d" % line[0] for count in xrange(1, len(line)): lineOut += "%+9.4f " % line[count] noPlusLine = string.replace(lineOut, "+", " ") tstFile.write(noPlusLine) tstFile.write("\n") tstFile.write("END ATOM_DEPTH_OLD\n") tstFile.close()
def calculateCharges(tstD, chargeD): '''actually does charge assignment, returns 2 lists''' pdbD = pdb.pdbData() for line in tstD.dict['PDB_RECORD']: pdbD.processLine(line) pdbD.assignCharges(chargeD) #for index in xrange(len(pdbD.atoms)): #used to debug assignments # print pdbD.atoms[index], pdbD.resNames[index], pdbD.charges[index] chargeXyz, hydroXyz = [], [] # new tst record list of [number, charge] for pointPdb in tstD.dict['POINT_PDB_RECORD']: pdbNum = pointPdb[1] - 1 tempCharge = pdbD.charges[pdbNum] tempHydroCharge = pdbD.hydroCharges[pdbNum] if tempCharge is None: # warn user that charges didn't get assigned print "warning: charge is not assigned for " + pdbD.atoms[pdbNum] + \ " " + pdbD.resNames[pdbNum] print "charge is set to ZERO for now" tempCharge = 0. tempHydroCharge = 0. chargeXyz.append([pointPdb[0], tempCharge]) hydroXyz.append([pointPdb[0], tempHydroCharge]) return chargeXyz, hydroXyz
def analyzePdbBCompareFiles(firstFilename): pdbs = {} # each list is a pdb, which has a dictionary of residues filenameList = [] if firstFilename: listFile = open(firstFilename, 'r') try: for line in listFile: if len(line) == 6: filenameList.append( line[0:4] + "." + line[4] + ".nocav.tst.mesh.travelin.pdb") elif len(line) == 5: # analyze non-domains filenameList.append(line[0:4] + ".nocav.tst.mesh.travelin.pdb") elif len(line) > 8: filenameList.append(line[:-1]) else: # yeah just allow anything filenameList.append(line[:-1]) except StopIteration: pass listFile.close() if filenameList: for filename in filenameList: pdbs[filename] = [] pdbD = pdb.pdbData(filename) tempRes = {} for index, resName in enumerate(pdbD.resNames): if pdbD.radii[index] > 0.: if resName not in tempRes: tempRes[resName] = {} if string.strip(pdbD.atoms[index]) not in tempRes[resName]: tempRes[resName][string.strip(pdbD.atoms[index])] = [] tempRes[resName][string.strip(pdbD.atoms[index])].append( pdbD.factors[index][1]) pdbs[filename] = tempRes #residues now contains all the b-factor (travelin) data #need to change after this line to call new methods listFilename = firstFilename analyzePdbB.makeMeanPerProteinReport( pdbs, outName=listFilename + ".meansbypdb.txt")
def checkPathBarriers(prefix): tstName = prefix + ".nocav.tst" findHolesName = tstName + ".findholes.log" findHolesFile = open(findHolesName, 'r') findHolesLines = findHolesFile.readlines() findHolesFile.close() HolesName = tstName + ".membranehole.log" # holds all the output goodHolesName = tstName + ".good.membranehole.log" # just the 1 1 0 1 1 sideHolesName = tstName + ".side.membranehole.log" # just the * * 1 * * badHolesName = tstName + ".bad.membranehole.log" # all others pdbWithBarriersFileName = "planes_" + prefix + ".pdb" pdbBarriers = pdb.pdbData(pdbWithBarriersFileName) #get the barriers read in and defined barrierAtomList = [[], []] for index, resName in enumerate(pdbBarriers.resNames): if resName == "DUM": if pdbBarriers.atoms[index][0] == "O": barrierAtomList[0].append(pdbBarriers.coords[index]) elif pdbBarriers.atoms[index][0] == "N": barrierAtomList[1].append(pdbBarriers.coords[index]) barrierZ = [barrierAtomList[0][0][2], barrierAtomList[1][0][2]] barrierZ.sort() barrierSep = geometry.distL2(barrierAtomList[0][0], barrierAtomList[1][0]) #barrier is just Z coordinate #setup for main loop over paths poreSuffix = ".pore.py" logFile = open(HolesName, 'w') goodLogFile = open(goodHolesName, 'w') sideLogFile = open(sideHolesName, 'w') badLogFile = open(badHolesName, 'w') #the following 5 things are calculated and written for each path, headers #the 6th, barrier separation, is really the same for each structure logFile.write("endsBeyond1count barrier1count endsBetweenCount ") logFile.write("barrier2count endsBeyond2count barrierSeparation\n") goodLogFile.write("prefix ") goodLogFile.write(string.strip(findHolesLines[0]) + " ") goodLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") goodLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") sideLogFile.write("prefix ") sideLogFile.write(string.strip(findHolesLines[0]) + " ") sideLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") sideLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") badLogFile.write("prefix ") badLogFile.write(string.strip(findHolesLines[0]) + " ") badLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") badLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") holeNumber = 1 poreFile = tstName + "." + str(holeNumber) + poreSuffix print poreFile paths = [] sides = [] while os.path.exists(poreFile): path = comparePaths.readCGOPath(poreFile) paths.append(path) intersections = [0, 0] for index, barrier in enumerate(barrierZ): intersections[index] = countCrossingsZ(path, barrier) ends = [0, 0, 0] for endPoint in [path[0], path[-1]]: endPointZ = endPoint[2] if endPointZ < barrierZ[0] and endPointZ < barrierZ[1]: ends[0] += 1 elif endPointZ >= barrierZ[0] and endPointZ <= barrierZ[1]: ends[1] += 1 elif endPointZ > barrierZ[0] and endPointZ > barrierZ[1]: ends[2] += 1 outputThisTime = str(ends[0]) + " " + str(intersections[0]) + " " + \ str(ends[1]) + " " + str(intersections[1]) + " " + \ str(ends[2]) + " " + str(barrierSep) + " " logFile.write(outputThisTime) logFile.write("\n") if ends[0] + ends[1] + ends[2] != 2: print "problems sorting out the ends" if ends[0] == 1 and ends[2] == 1 and intersections == [1, 1]: # it is 'good' goodLogFile.write(prefix + " ") goodLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") goodLogFile.write(outputThisTime + "\n") elif ends[1] == 2: sides.append(len(paths) - 1) sideLogFile.write(prefix + " ") sideLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") sideLogFile.write(outputThisTime + "\n") else: badLogFile.write(prefix + " ") badLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") badLogFile.write(outputThisTime + "\n") #and that is it for this path holeNumber += 1 # get set up for next pass poreFile = tstName + "." + str(holeNumber) + poreSuffix print sides logFile.close() goodLogFile.close() sideLogFile.close() badLogFile.close()
def tstPocketMap( tstFileName, phiFileName, tstD=None, ligandFileName=None, nearbyDistance=0., appendTst=True, doPCA=True): '''pocket mapping algorithm, finds all pockets on entire surface, puts in tree and graph data structure, various outputs''' print "read tst file" if tstD is None: # hasn't been read in already tstD = tstdata.tstData( tstFileName, necessaryKeys=tstdata.tstData.necessaryKeysForPocket) print "repairing nearby points if necessary" repairPointPdbRecord(tstD, tstFileName) print "calculating charges" chargeXyz, hydroXyz = calculateCharges(tstD, charge.charge()) print "calculating curvatures" edgeCurv, ptCurv, ptWeighCurv = tstCurvature.tstEdgeCurvature( tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_XYZ'], tstD.dict['POINT_TRIANGLE'], tstD.dict['POINT_NEIGHBOR']) tstD.dict['POINT_CURVATURE_EDGE'] = ptWeighCurv tstD.dict['CHARGE_XYZ'] = chargeXyz tstD.dict['HYDROPHOBIC_XYZ'] = hydroXyz print "read in phi file" phiData = phi(phiFileName) # read in the phimap print "making mesh data structure" meshData, gridData = meshConstruct( tstD, phiData, tstFileName, threshold="auto", cavities=True) meshData.setPtHydro(tstD.dict['HYDROPHOBIC_XYZ']) meshData.setPtCurvature(tstD.dict['POINT_CURVATURE_EDGE']) gridSize = 1.0/phiData.scale tstPdbRecord = tstD.dict['PDB_RECORD'] meshData.setSurfaceArea(tstD.dict['TRIANGLE_POINT']) del tstD, phiData, gridData # not needed, reclaim memory pdbD = pdb.pdbData() pdbD.processLines(tstPdbRecord) pointAtomList = meshData.calculateNearbyAtoms(pdbD, nearbyDistance) meshData.setVolume(gridSize) print "calculating travel depth" meshData.calculateTravelDistance("traveldepth", [0], [2, 3, 5]) pointTravelDepth = meshData.getSurfaceTravelDistance("traveldepth") if ligandFileName is not None: # if there is a ligand, read it ligand = pdb.pdbData(ligandFileName) ligandXYZR = ligand.getHeavyAtomXYZRadius() nodeWithinSet = meshData.getWithinNodesNoInside(ligandXYZR) bestIU = 0. # intersection / union, 1 is perfect #print nodeWithinSet, len(nodeWithinSet) print "pocket mapping starting" if ligandFileName is not None and len(nodeWithinSet) > 0: outFileName = ligandFileName #tstdebug.nodeDebug(nodeWithinSet, \ # filename = tstFileName+".within.ligand.py") localMaxima, borders, tm3tree, surfNodeToLeaf = meshData.pocketMapping( 'traveldepth', [2, 3, 5], pointAtomList, pdbD, outName=outFileName + ".", groupName='group', ligandNodes=nodeWithinSet, doPCA=doPCA) else: outFileName = tstFileName localMaxima, borders, tm3tree, surfNodeToLeaf = meshData.pocketMapping( 'traveldepth', [2, 3, 5], pointAtomList, pdbD, outName=outFileName + ".", groupName='group', doPCA=doPCA) #print len(localMaxima), len(borders), tm3tree, len(surfNodeToLeaf) #tstdebug.nodeDebug(localMaxima, \ # filename=tstFileName+".localmaxima.pocketmap.py") #tstdebug.nodeDebug(borders, \ # filename=tstFileName+".borders.pocketmap.py") #tstdebug.nodeDebug(meshData.getSurfaceNodes(), \ # filename=tstFileName+".groups.pocketmap.py", name='group') tm3tree.write(outFileName + ".tree.tm3") #tm3tree.writeTNV(tstFileName + ".tree.tnv") #doesn't seem to import into treemap correctly if appendTst: # turn off sometimes since appends to tst file print "appending data to tst file" surfNodes = meshData.getSurfaceNodes() pointLeafList = [] for aNode in surfNodes: if aNode not in surfNodeToLeaf: print aNode, aNode.distances leafNum = 0 # made up and wrong for testing else: leafNum = surfNodeToLeaf[aNode] pointLeafList.append([aNode, int(leafNum)]) #print pointLeafList leafToGroup = tm3tree.getLeafToGroup() leafGroupList = [] leafKeyMax = max(leafToGroup.keys()) for leaf in xrange(leafKeyMax): tempList = [leaf + 1] try: tempList.extend(leafToGroup[leaf + 1]) except KeyError: pass # means leaf doesn't exist leafGroupList.append(tempList) #print leafGroupList tstFile = open(tstFileName, 'a') tstdata.writeEntryIntegers( pointLeafList, "POINT_LEAF LIST", "END POINT_LEAF", tstFile) tstdata.writeEntryIntegers( leafGroupList, "LEAF_GROUP LIST", "END LEAF_GROUP", tstFile) tstdata.writeEntryIntegers( pointAtomList, "POINT_NEARBY_ATOM LIST", "END POINT_NEARBY_ATOM", tstFile) #also write curvature and charge data here tstdata.writeEntrySingleFloat( ptWeighCurv, "POINT_CURVATURE_EDGE LIST", "END POINT_CURVATURE_EDGE", tstFile) tstdata.writeEntrySingleFloat( chargeXyz, "CHARGE_XYZ", "END CHARGE_XYZ", tstFile) tstdata.writeEntrySingleFloat( hydroXyz, "HYDROPHOBIC_XYZ", "END HYDROPHOBIC_XYZ", tstFile) #write data to file tstFile.write("DEPTH_TRAVEL_DIST\n") for line in pointTravelDepth: lineOut = "%8d" % line[0] for count in xrange(1, len(line)): lineOut += "%+9.4f " % line[count] noPlusLine = string.replace(lineOut, "+", " ") tstFile.write(noPlusLine) tstFile.write("\n") tstFile.write("END DEPTH_TRAVEL_DIST\n") tstFile.close() print "pocket mapping complete"
import glob import os import geometry import string size = 256 # should be enough for anybody databaseDir = 'database' pdbLocation = 'pdbs' radius = 1.8 #radius of atoms, estimate try: os.mkdir(databaseDir) except OSError: pass # directory exists, fine for onePdb in glob.iglob(os.path.join(pdbLocation, '*.pdb')): # every PDB pdbD = pdb.pdbData(onePdb) pdbCode = string.split(os.path.split(onePdb)[-1], '.')[0] points = pdbD.coords try: os.mkdir(os.path.join(databaseDir, pdbCode)) except OSError: pass # directory exists, fine for vector in xrange(3): for thetaTen in xrange(0, 62, 3): vecX, vecY, vecZ = 0., 0., 0. if vector == 0: vecX = 1. elif vector == 1: vecY = 1. elif vector == 2: vecZ = 1.
def tstTravelSurfInsideMesh(tstFileName, phiFileName, threshold="auto"): '''calculates the burial depth''' print "reading in tst and phi files" tstD = tstdata.tstData( tstFileName, necessaryKeys=tstdata.tstData.necessaryKeysForMesh + ['PDB_RECORD']) phiData = phi(phiFileName) # read in the phimap if possible if 'CONVEX_HULL_TRI_POINT_LIST' not in tstD.dict.keys(): print "Run tstConvexHull.py on this tst data file first." sys.exit(1) #these sets are useful to construct convexHullPoints = set() for record in tstD.dict['CONVEX_HULL_TRI_POINT_LIST']: convexHullPoints.update(record[1:]) maxPhi = phiData.getMaxValues() if threshold == "auto" and maxPhi == 1.0: threshold = 0.6 if threshold == "auto" and maxPhi == 10.0: threshold = 6.0 gridD, mins, maxs = grid.makeTrimmedGridFromPhi( phiData, tstD.dict['POINT_XYZ'], convexHullPoints, threshold, 0, -2, 2) gridSize = 1.0 / phiData.scale del phiData # no longer needed in this function, so delete this reference #do the biggest disjoint set of tris/points stuff allPoints, allTris, cavPoints, cavTris = cavity.assumeNoCavities( tstD.dict['POINT_XYZ'], tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_NEIGHBOR']) #here is where code is mesh-specific print "setting up mesh data structure" meshData = mesh.mesh( gridD, tstD.dict['POINT_XYZ'], tstD.dict['POINT_NEIGHBOR'], gridSize, -2, 0, "X") # no between print "calculating burial depth" meshData.calculateTravelDistance("travelin", [3], [1]) gridTravelInDepth = meshData.getGridTravelDistance(gridD, "travelin") #tstdebug.debugGridCountVals(gridTravelInDepth) print "writing phi file output" phiDataOut = phi() phiDataOut.createFromGrid( gridTravelInDepth, gridSize, toplabel="travel depth surf-in") phiDataOut.write(tstFileName+".mesh.travel.in.phi") print "writing pdb file output" pdbD = pdb.pdbData() for line in tstD.dict['PDB_RECORD']: pdbD.processLine(line) atomTravelInDepths = grid.assignAtomDepths( gridTravelInDepth, gridSize, mins, maxs, pdbD) #make a pdb file with the bfactor replaced for index, atomTID in enumerate(atomTravelInDepths): pdbD.updateFactors(index, (pdbD.factors[index][0], atomTID)) pdbD.write(tstFileName+".mesh.travelin.pdb") #also add record to tstD atomTIDRecord = [] for index, atomTID in enumerate(atomTravelInDepths): atomTIDRecord.append([index + 1, atomTID]) print "updating tst file" tstD.dict['ATOM_TRAVEL_IN'] = atomTIDRecord #write data into tst file tstFile = open(tstFileName, 'a') tstFile.write("ATOM_TRAVEL_IN\n") for line in tstD.dict['ATOM_TRAVEL_IN']: lineOut = "%8d" % line[0] for count in xrange(1, len(line)): lineOut += "%+9.4f " % line[count] noPlusLine = string.replace(lineOut, "+", " ") tstFile.write(noPlusLine) tstFile.write("\n") tstFile.write("END ATOM_TRAVEL_IN\n") tstFile.close() print "burial depth done"
#!/usr/bin/env python #ryan g. coleman [email protected] #outputs all NMR models as single pdb files #grab system, string, regular expression, and operating system modules import sys import string import re import os import math import pdb # for chain sorting ease if -1 != string.find(sys.argv[0], "outputAllNMRpdb.py"): fileName = sys.argv[1] pdbD = pdb.pdbData(fileName) modelNums = pdbD.getModelNumbers() for modelNum in modelNums: newPdb = pdbD.getOneModel(modelNum) outputFileName = sys.argv[1][:-4] + "." + str(modelNum) + ".pdb" newPdb.write(outputFileName)
def tstTravelSurfInside(tstFileName, phiFileName=False): tstD = tstdata.tstData(tstFileName) # read the file into the data structure phiData = phi(phiFileName) # read in the phimap if possible if 'CONVEX_HULL_TRI_POINT_LIST' not in tstD.dict.keys(): print "Run tstConvexHull.py on this tst data file first." sys.exit(1) #these sets are useful to construct convexHullPoints = set() for record in tstD.dict['CONVEX_HULL_TRI_POINT_LIST']: convexHullPoints.update(record[1:]) gridD, mins, maxs = grid.makeTrimmedGridFromPhi( phiData, tstD.dict['POINT_XYZ'], convexHullPoints, 0.6, 0, -2.0, 2) gridSize = 1.0/phiData.scale del phiData # no longer needed in this function, so delete this reference #do the biggest disjoint set of tris/points stuff allPoints, allTris, cavPoints, cavTris = cavity.assumeNoCavities( tstD.dict['POINT_XYZ'], tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_NEIGHBOR']) #here's the (relatively simple) surface travel distance calculation finally # assign following encoding -1 = outside ch, 0 = on border, # pos ints = dist from border, -2 = far inside ms, # other neg ints = -(dist)-3 #whole algorithm wrapped into big function... extraEdges, surfaceEdgeBoxes = grid.findLongSurfEdges( tstD.dict['POINT_XYZ'], tstD.dict['POINT_NEIGHBOR'], gridSize, mins, maxs) for surfaceEdgeBox in surfaceEdgeBoxes.keys(): x, y, z = gridD[surfaceEdgeBox[0]][surfaceEdgeBox[1]][surfaceEdgeBox[2]][1:] gridD[surfaceEdgeBox[0]][surfaceEdgeBox[1]][surfaceEdgeBox[2]] = ( -1., x, y, z) pointTravelDist, traceback, volumePointDepths = \ travelDistNoMesh.calcTravelDist( gridD, tstD.dict['POINT_XYZ'], gridSize, mins, maxs, allPoints, extraEdges, surfaceEdgeBoxes, tstFileName) #transform grid to actual travel distance maxTD = grid.finalizeGridTravelDist(gridD, gridSize) phiDataOut = phi() phiDataOut.createFromGrid(gridD, gridSize, toplabel="travel depth surf-in") phiDataOut.write(tstFileName+".travel.in.phi") pdbD = pdb.pdbData() for line in tstD.dict['PDB_RECORD']: pdbD.processLine(line) atomTravelInDepths = grid.assignAtomDepths(gridD, gridSize, mins, maxs, pdbD) #make a pdb file with the bfactor replaced for index, atomTID in enumerate(atomTravelInDepths): pdbD.updateFactors(index, (pdbD.factors[index][0], atomTID)) pdbD.write(tstFileName+".travelin.pdb") #also add record to tstdata atomTIDRecord = [] for index, atomTID in enumerate(atomTravelInDepths): atomTIDRecord.append([index+1, atomTID]) tstD.dict['ATOM_TRAVEL_IN'] = atomTIDRecord #write data into tst file tstFile = open(tstFileName, 'a') tstFile.write("ATOM_TRAVEL_IN\n") for line in tstD.dict['ATOM_TRAVEL_IN']: lineOut = "%8d" % line[0] for count in xrange(1, len(line)): lineOut += "%+9.4f " % line[count] noPlusLine = string.replace(lineOut, "+", " ") tstFile.write(noPlusLine) tstFile.write("\n") tstFile.write("END ATOM_TRAVEL_IN\n") tstFile.close()
def copyCode(pdbCode): fileName = grab_pdb.getCode(pdbCode) pdbD = pdb.pdbData(fileName) pdbD.write(pdbCode + ".-.pdb")
def getCodeChain(pdbCode, chain): fileName = grab_pdb.getCode(pdbCode) pdbD = pdb.pdbData(fileName) newPdbD = pdbD.getOneChain(chain) newPdbD.write(pdbCode + "." + chain + ".pdb")
def makeTst( pdbFileName, gridSpacing, pathTo="$TDHOME/bin/", whichSrf="tri", probeSize=False, radScaleIn=False): ''' figures out how to create proper grid spacing, calls fortran programs ''' pathTo = os.path.expandvars(pathTo) if "tri" == whichSrf: # pick which method to use, default srf = trisrf probe = triprobe elif "mesh" == whichSrf: # alternate, better for tunnels srf = meshsrf probe = meshprobe if probeSize: # means use non-default value... probe = probeSize if radScaleIn: # same, use non-default value radScaleUse = float(radScaleIn) else: radScaleUse = radscale pdbEntry = pdb.pdbData(pdbFileName) rootNameTemp = string.replace(pdbFileName, ".pdb", "") rootName = string.replace(rootNameTemp, ".PDB", "") tstFileName = rootName + ".tst" triFileName = rootName + ".tri" phiFileName = rootName + ".phi" mins, maxs = [xVal for xVal in pdbEntry.coords[0]], [ xVal for xVal in pdbEntry.coords[0]] atoms = pdbEntry.atoms for dimension in range(3): for index, coord in enumerate(pdbEntry.coords): mins[dimension] = min( mins[dimension], coord[dimension] - (pdb.radiiDefault[atoms[index][0]]*radScaleUse)) maxs[dimension] = max( maxs[dimension], coord[dimension] + (pdb.radiiDefault[atoms[index][0]]*radScaleUse)) difference = 0 for dimension in range(3): difference = max(difference, maxs[dimension] - mins[dimension]) length = difference + 2. * probe gridScale = 1. / float(gridSpacing) possibleGridSizes = srf.keys() possibleGridSizes.sort() for possibleGridSize in possibleGridSizes: percentFill = gridScale * 100 * length / (possibleGridSize - 1) if percentFill < 99: break # keep these settings, they are good enough if possibleGridSizes[-1] == possibleGridSize: print "no grid size big enough, either make new version of tst or " + \ "adjust grid size parameter" sys.exit(1) srfExecutable = pathTo + srf[possibleGridSize] if not os.path.exists(srfExecutable): print "the surface preparation executable does not exist at: ", \ srfExecutable exit(1) if "tri" == whichSrf: execString = srfExecutable + " " + \ pdbFileName + " " + str(contour) # run trisrf elif "mesh" == whichSrf: execString = srfExecutable + " " + \ pdbFileName + " " + str(probe) + " " + str(radScaleUse) # run meshsrf #print percentFill, execString try: os.unlink("trisrf.tri") except OSError: pass # this is okay, just making sure it is deleted trisrfProc = os.popen4(execString) if "tri" == whichSrf: # pick which method to use, default trisrfProc[0].write(str(percentFill) + "\n33\n") elif "mesh" == whichSrf: # alternate, better for tunnels trisrfProc[0].write(str(percentFill) + "\n") trisrfProc[0].flush() trisrfProc[0].close() finishedRunningSrf = trisrfProc[1].read() log = open(tstFileName + ".log", 'w') log.write(finishedRunningSrf) if "tri" == whichSrf: # pick which method to use, default try: os.rename("trisrf.tri", triFileName) os.rename("trisrf.phi", phiFileName) except OSError: # actual problem print "trisrf did not make .tri file, check logs" log.close() return False elif "mesh" == whichSrf: # alternate, better for tunnels try: os.rename("meshsrfA.tri", triFileName) os.rename("meshsrfA.phi", phiFileName) except OSError: # actual problem print "meshsrf did not make .tri file, check logs" log.close() return False if not os.path.exists(pathTo + gen): print "the surface generation executable does not exist at: " + pathTo + \ gen exit(1) trigenProc = os.popen4(pathTo + gen + " " + triFileName + " " + tstFileName) trigenProc[0].flush() trigenProc[0].close() finishedRunningGen = trigenProc[1].read() try: os.unlink("trilinel.dat") os.unlink("triline.usr") os.unlink("trinext.dat") os.unlink("trisrf.pdb") os.unlink("trisrf.rec") os.unlink("trisrf.usr") os.unlink("fort.10") os.unlink("trigen.py") os.unlink("triline.py") except OSError: pass # again, just cleaning up junk files try: os.unlink("mesh.pdb") os.unlink("meshline.usr") os.unlink("meshlinel.dat") os.unlink("meshtri.dat") os.unlink("fort.10") os.unlink("trigen.py") os.unlink("triline.py") except OSError: pass # again, just cleaning up junk files log.write(finishedRunningGen) log.close() return True # indicates success
#!/usr/bin/env python #reads in what are assumed to be ligand pdb files #for each one, cluster based on some threshold, break into distinct clusters, #write each cluster import string import sys import pdb if -1 != string.find(sys.argv[0], "clusterLigands.py"): filenames = sys.argv[1:] for filename in filenames: pdbD = pdb.pdbData(filename) clusters = pdbD.clusterAtoms(distanceCutoff=5.0) #print filename, len(clusters) #debug width = len(str(len(clusters))) for clusterIndex, cluster in enumerate(clusters): outputNum = string.zfill(clusterIndex, width) outputName = outputNum + "_" + filename cluster.write(outputName)
#!/usr/bin/env python import sys import pdb OK_RESIDUES = set() OK_RESIDUES.update(pdb.aminoAcid3Codes) OK_RESIDUES.update(pdb.keepPolarH.keys()) OK_RESIDUES.remove('HEM') # Only keep ions and waters in addition p = pdb.pdbData(sys.argv[1], ignoreWaters=False) mode = sys.argv[3] for idx, line in enumerate(p.rawData): p.rawData[idx] = line[:79] if mode == 'final': # p.replaceHETATMwithATOM() p.removeApolarHydrogen() p.write(sys.argv[2]+'.H') p = pdb.pdbData(sys.argv[2]+'.H', ignoreWaters=False) p.renameHistidines() p.renameCysteines() for idx, resName in enumerate(p.resNames): if resName not in OK_RESIDUES: p.removeLine(idx) for idx, alt in enumerate(p.altChars): if alt not in (' ', 'A'):
#!/usr/bin/env python #ryan g. coleman [email protected] [email protected] #kim sharp lab http://crystal.med.upenn.edu #finds all inter-atom distances import string import sys import geometry import pdb if -1 != string.find(sys.argv[0], "pdbDistances.py"): try: for pdbName in sys.argv[1:]: pdbD = pdb.pdbData(pdbName) outputName = pdbName.replace("pdb", "").replace(".", "") longestDist, meanDist = geometry.longestAndMeanDist( pdbD.getHeavyAtomXYZ()) print outputName, "\t", longestDist, "\t", meanDist except IndexError: print "pdbDistances.py pdbName [list of more pdbs]" print "outputs to standard out" sys.exit(1)
#!/usr/bin/env python # reads in a list of codes, finds pdb + ligand files, outputs # residues nearby each ligand in the pdb import string import sys import pdb import glob if -1 != string.find(sys.argv[0], "getResiduesNearbyLigands.py"): prefixes = sys.argv[1:] for prefix in prefixes: files = glob.glob("*" + prefix + "*pdb") mainPdb, ligandPdbs = False, [] for filename in files: if -1 == filename.find("ligand"): # is main mainPdb = filename elif -1 == filename.find("nearby"): # is not output from previous run ligandPdbs.append(filename) for ligandName in ligandPdbs: ligandPdbD = pdb.pdbData(ligandName) mainPdbD = pdb.pdbData(mainPdb) nearbyPdb = mainPdbD.getNearbyResidues(ligandPdbD.coords, 5.0) nearbyPdb.write("nearby_" + ligandName) nearbyPdb = pdb.pdbData("nearby_" + ligandName) justResString = pdb.turnListIntoString(nearbyPdb.getResidueNamesChains()) outFile = open("nearby_" + ligandName + ".res", "w") outFile.write(justResString) outFile.close()
def checkPathBarriers(prefix): tstName = prefix + ".nocav.tst" findHolesName = tstName + ".findholes.log" findHolesFile = open(findHolesName, 'r') findHolesLines = findHolesFile.readlines() findHolesFile.close() HolesName = tstName + ".sideshole.log" # holds all the output goodHolesName = tstName + ".good.sideshole.log" # just the 1 1 0 1 1 sideHolesName = tstName + ".side.sideshole.log" # just the * * 1 * * badHolesName = tstName + ".bad.sideshole.log" # all others pdbWithBarriersFileName = "planes_" + prefix + ".pdb" pdbBarriers = pdb.pdbData(pdbWithBarriersFileName) #get the barriers read in and defined barrierAtomList = [[], []] for index, resName in enumerate(pdbBarriers.resNames): if resName == "DUM": if pdbBarriers.atoms[index][0] == "O": barrierAtomList[0].append(pdbBarriers.coords[index]) elif pdbBarriers.atoms[index][0] == "N": barrierAtomList[1].append(pdbBarriers.coords[index]) barrierZ = [barrierAtomList[0][0][2], barrierAtomList[1][0][2]] barrierZ.sort() barrierSep = geometry.distL2(barrierAtomList[0][0], barrierAtomList[1][0]) #barrier is just Z coordinate #setup for main loop over paths poreSuffix = ".pore.py" logFile = open(HolesName, 'w') goodLogFile = open(goodHolesName, 'w') sideLogFile = open(sideHolesName, 'w') badLogFile = open(badHolesName, 'w') #the following 5 things are calculated and written for each path, headers #the 6th, barrier separation, is really the same for each structure logFile.write("endsBeyond1count barrier1count endsBetweenCount ") logFile.write("barrier2count endsBeyond2count barrierSeparation\n") goodLogFile.write("prefix ") goodLogFile.write(string.strip(findHolesLines[0]) + " ") goodLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") goodLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") sideLogFile.write("prefix ") sideLogFile.write(string.strip(findHolesLines[0]) + " ") sideLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") sideLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") badLogFile.write("prefix ") badLogFile.write(string.strip(findHolesLines[0]) + " ") badLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") badLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") holeNumber = 1 poreFile = tstName + "." + str(holeNumber) + poreSuffix print poreFile paths = [] sides, goods = [], [] endsToPaths = {} pathsToEnds = {} while os.path.exists(poreFile): path = comparePaths.readCGOPath(poreFile) pathRad = comparePaths.readCGOPathWithRadius(poreFile) paths.append(pathRad) pathNum = len(paths) - 1 for end in string.split(findHolesLines[holeNumber])[1:3]: if pathNum not in pathsToEnds: pathsToEnds[pathNum] = [] pathsToEnds[pathNum].append(end) if end not in endsToPaths: endsToPaths[end] = [] endsToPaths[end].append(pathNum) intersections = [0, 0] for index, barrier in enumerate(barrierZ): intersections[index] = countCrossingsZ(path, barrier) ends = [0, 0, 0] for endPoint in [path[0], path[-1]]: endPointZ = endPoint[2] if endPointZ < barrierZ[0] and endPointZ < barrierZ[1]: ends[0] += 1 elif endPointZ >= barrierZ[0] and endPointZ <= barrierZ[1]: ends[1] += 1 elif endPointZ > barrierZ[0] and endPointZ > barrierZ[1]: ends[2] += 1 outputThisTime = str(ends[0]) + " " + str(intersections[0]) + " " + \ str(ends[1]) + " " + str(intersections[1]) + " " + \ str(ends[2]) + " " + str(barrierSep) + " " logFile.write(outputThisTime) logFile.write("\n") if ends[0] + ends[1] + ends[2] != 2: print "problems sorting out the ends" if ends[0] == 1 and ends[2] == 1 and intersections == [1, 1]: #it is 'good' goods.append(pathNum) goodLogFile.write(prefix + " ") goodLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") goodLogFile.write(outputThisTime + "\n") elif ends[1] == 1: sides.append(pathNum) sideLogFile.write(prefix + " ") sideLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") sideLogFile.write(outputThisTime + "\n") else: badLogFile.write(prefix + " ") badLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") badLogFile.write(outputThisTime + "\n") #and that is it for this path holeNumber += 1 # get set up for next pass poreFile = tstName + "." + str(holeNumber) + poreSuffix logFile.close() goodLogFile.close() sideLogFile.close() badLogFile.close() #next lines are for debugging the new data structures ''' print sides print goods print endsToPaths print pathsToEnds ''' #now want to find side branches of good paths branches = 0 branchSuffix = ".branch.py" branchFile = tstName + "." + str(branches) + branchSuffix branchLog = open(tstName + ".branchholes.log", 'w') branchLog.write(string.strip(findHolesLines[0]) + "\n") for side in sides: foundGoods = [] for sideEnd in pathsToEnds[side]: for good in goods: for goodEnd in pathsToEnds[good]: if goodEnd == sideEnd: foundGoods.append(good) if len(foundGoods) > 0: branchedPath = paths[side] # start with whole path for good in foundGoods: # remove physiological intersecting paths branchedPath = pathsModule.subtractPaths(branchedPath, paths[good]) if len(branchedPath) > 0: # has to have some length remaining branches += 1 branchFile = tstName + "." + str(branches) + branchSuffix print branches, side, foundGoods tstdebug.debugSetGridSpheres( branchedPath, 0.5, branchFile, radius=True, mainColor=(0.01, 0.9, 0.35)) branchLog.write(str(branches) + " ") branchLog.write(str(pathsToEnds[side][0]) + " ") branchLog.write(str(pathsToEnds[side][1]) + " ") branchLog.write("- ") # dummy, not real branchLog.write("0. 0. 0. 0. 0. 0. 0. 0. 0. 0. \n") branchLog.close() addFoundHoleStats.redoFindholes( prefix, nearbyDistance=4., logExt=".branchholes.log", poreSuffix=".branch.py", nearbyName=".branch")
resNumStr = goodNode.attributes[columnNums[9]] newPdb = pdbData.getListResiduesChains(resNumStr) #open the output file outFile = open( str(paramCount) + "." + str(goodCount) + ".xtal-lig.pdb", 'w') outFile.write("REMARK parameter set: " + str(paramCount) + "\n") for name, value in params[paramCount].iteritems(): outFile.write("REMARK paramater " + name + ": " + str(value) + "\n") outFile.write("REMARK pocket number: " + str(goodCount) + "\n") #write a bunch of remarks about all the attributes for colCount in xrange(len(columnNames)): outFile.write( "REMARK " + str(columnNames[colCount]) + ": " + str(goodNode.attributes[columnNums[colCount]]) + "\n") newPdb.outputLines(outFile) # write the actual PDB file outFile.close() if -1 != string.find(sys.argv[0], "tm3pickerPdb.py"): pdbData = pdb.pdbData(sys.argv[1]) tmData = tm3.tmTreeFromFile(sys.argv[2]) paramsNew = None if len(sys.argv) > 8: # user wants to declare some an extra set of parameters paramsNew = {} paramNames = [ "volMin", "volMax", "apolarMin", "polarMin", "apolarFractionMax", "polarFractionMax"] for count in xrange(3, 9): paramsNew[paramNames[count - 3]] = float(sys.argv[count]) print "user declare pocket picker parameters:", paramsNew pickPocketsPdb(pdbData, tmData, paramsNew)