def checkPathBarriers(prefix): tstName = prefix + ".nocav.tst" findHolesName = tstName + ".findholes.log" findHolesFile = open(findHolesName, 'r') findHolesLines = findHolesFile.readlines() findHolesFile.close() HolesName = tstName + ".sideshole.log" # holds all the output goodHolesName = tstName + ".good.sideshole.log" # just the 1 1 0 1 1 sideHolesName = tstName + ".side.sideshole.log" # just the * * 1 * * badHolesName = tstName + ".bad.sideshole.log" # all others pdbWithBarriersFileName = "planes_" + prefix + ".pdb" pdbBarriers = pdb.pdbData(pdbWithBarriersFileName) #get the barriers read in and defined barrierAtomList = [[], []] for index, resName in enumerate(pdbBarriers.resNames): if resName == "DUM": if pdbBarriers.atoms[index][0] == "O": barrierAtomList[0].append(pdbBarriers.coords[index]) elif pdbBarriers.atoms[index][0] == "N": barrierAtomList[1].append(pdbBarriers.coords[index]) barrierZ = [barrierAtomList[0][0][2], barrierAtomList[1][0][2]] barrierZ.sort() barrierSep = geometry.distL2(barrierAtomList[0][0], barrierAtomList[1][0]) #barrier is just Z coordinate #setup for main loop over paths poreSuffix = ".pore.py" logFile = open(HolesName, 'w') goodLogFile = open(goodHolesName, 'w') sideLogFile = open(sideHolesName, 'w') badLogFile = open(badHolesName, 'w') #the following 5 things are calculated and written for each path, headers #the 6th, barrier separation, is really the same for each structure logFile.write("endsBeyond1count barrier1count endsBetweenCount ") logFile.write("barrier2count endsBeyond2count barrierSeparation\n") goodLogFile.write("prefix ") goodLogFile.write(string.strip(findHolesLines[0]) + " ") goodLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") goodLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") sideLogFile.write("prefix ") sideLogFile.write(string.strip(findHolesLines[0]) + " ") sideLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") sideLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") badLogFile.write("prefix ") badLogFile.write(string.strip(findHolesLines[0]) + " ") badLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") badLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") holeNumber = 1 poreFile = tstName + "." + str(holeNumber) + poreSuffix print poreFile paths = [] sides, goods = [], [] endsToPaths = {} pathsToEnds = {} while os.path.exists(poreFile): path = comparePaths.readCGOPath(poreFile) pathRad = comparePaths.readCGOPathWithRadius(poreFile) paths.append(pathRad) pathNum = len(paths) - 1 for end in string.split(findHolesLines[holeNumber])[1:3]: if pathNum not in pathsToEnds: pathsToEnds[pathNum] = [] pathsToEnds[pathNum].append(end) if end not in endsToPaths: endsToPaths[end] = [] endsToPaths[end].append(pathNum) intersections = [0, 0] for index, barrier in enumerate(barrierZ): intersections[index] = countCrossingsZ(path, barrier) ends = [0, 0, 0] for endPoint in [path[0], path[-1]]: endPointZ = endPoint[2] if endPointZ < barrierZ[0] and endPointZ < barrierZ[1]: ends[0] += 1 elif endPointZ >= barrierZ[0] and endPointZ <= barrierZ[1]: ends[1] += 1 elif endPointZ > barrierZ[0] and endPointZ > barrierZ[1]: ends[2] += 1 outputThisTime = str(ends[0]) + " " + str(intersections[0]) + " " + \ str(ends[1]) + " " + str(intersections[1]) + " " + \ str(ends[2]) + " " + str(barrierSep) + " " logFile.write(outputThisTime) logFile.write("\n") if ends[0] + ends[1] + ends[2] != 2: print "problems sorting out the ends" if ends[0] == 1 and ends[2] == 1 and intersections == [1, 1]: #it is 'good' goods.append(pathNum) goodLogFile.write(prefix + " ") goodLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") goodLogFile.write(outputThisTime + "\n") elif ends[1] == 1: sides.append(pathNum) sideLogFile.write(prefix + " ") sideLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") sideLogFile.write(outputThisTime + "\n") else: badLogFile.write(prefix + " ") badLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") badLogFile.write(outputThisTime + "\n") #and that is it for this path holeNumber += 1 # get set up for next pass poreFile = tstName + "." + str(holeNumber) + poreSuffix logFile.close() goodLogFile.close() sideLogFile.close() badLogFile.close() #next lines are for debugging the new data structures ''' print sides print goods print endsToPaths print pathsToEnds ''' #now want to find side branches of good paths branches = 0 branchSuffix = ".branch.py" branchFile = tstName + "." + str(branches) + branchSuffix branchLog = open(tstName + ".branchholes.log", 'w') branchLog.write(string.strip(findHolesLines[0]) + "\n") for side in sides: foundGoods = [] for sideEnd in pathsToEnds[side]: for good in goods: for goodEnd in pathsToEnds[good]: if goodEnd == sideEnd: foundGoods.append(good) if len(foundGoods) > 0: branchedPath = paths[side] # start with whole path for good in foundGoods: # remove physiological intersecting paths branchedPath = pathsModule.subtractPaths(branchedPath, paths[good]) if len(branchedPath) > 0: # has to have some length remaining branches += 1 branchFile = tstName + "." + str(branches) + branchSuffix print branches, side, foundGoods tstdebug.debugSetGridSpheres( branchedPath, 0.5, branchFile, radius=True, mainColor=(0.01, 0.9, 0.35)) branchLog.write(str(branches) + " ") branchLog.write(str(pathsToEnds[side][0]) + " ") branchLog.write(str(pathsToEnds[side][1]) + " ") branchLog.write("- ") # dummy, not real branchLog.write("0. 0. 0. 0. 0. 0. 0. 0. 0. 0. \n") branchLog.close() addFoundHoleStats.redoFindholes( prefix, nearbyDistance=4., logExt=".branchholes.log", poreSuffix=".branch.py", nearbyName=".branch")
def tstTravelFindHoles( tstFileName, phiFileName, debugOut=False, borderSize=2, nearbyDistance=4.): '''if debugout is set, additional files are created. bordersize can change the amount of extra space around the protein. nearbydistance changes the distance that nearby residues are gathered from.''' print "reading tst and phi files" tstD = tstdata.tstData( tstFileName, necessaryKeys=tstdata.tstData.necessaryKeysForHoles) phiData = phi(phiFileName) # read in the phimap if possible gridSize = 1.0 / phiData.scale # needed later numberHandles = tstD.countHandles() print "there are ", numberHandles, " holes in this structure" print "running travel depth now" phiTravelDepthGrid, phiTravelDepthData, meshData = tstTravelDepthMeshRun( tstD, phiData, tstFileName, borderSize=borderSize, threshold="auto") del phiData, phiTravelDepthGrid, phiTravelDepthData #not needed, reclaim memory print "calculating travel out distance" meshData.calculateTravelDistance("surfout", [3], [0, 2]) print "finding holes" loopTrisSave, loopPointsSave, regLoopTris, regLoopPts, pointNeighbors, \ pointNeighborsNodes, outsidePoints, outsidePointsNodes, possHoleStarts = \ tstTopology.fillInHolesAndGrow( tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_TRIANGLE'], tstD.dict['POINT_XYZ'], tstD.dict['NORM_XYZ'], numberHandles, tstFileName, debugOut, meshData, "surfout") if debugOut: tstdebug.debugTriangleList( regLoopTris, tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_XYZ'], tstFileName + ".regular.loops.py") tstDPointXYZ = tstD.dict['POINT_XYZ'] # save for later tstDPdbRecord = tstD.dict['PDB_RECORD'] # save for later del tstD # rest of tstD isn't needed, so free memory #output the possible places where HOLE could start... centers of regular plugs print "writing output files" paths.outputNodesText(possHoleStarts, tstFileName + ".HOLE.start.txt") pathsList = meshData.getPaths( "surfout", pointNeighbors, outsidePoints, possHoleStarts) del meshData # deletes everything that has no other refs (from paths) #print len(pointNeighbors), len(outsidePoints), len(possHoleStarts), #print len(pathsList) allPoints, outsidePts = [], [] for point in pointNeighborsNodes.keys(): allPoints.append(point.pathXyz) for point in outsidePointsNodes: outsidePts.append(point.pathXyz) tstdebug.pointDebug(allPoints, filename=tstFileName+".tree.py") tstdebug.pointDebug( outsidePts, filename=tstFileName+".outside.py", mainColor=(.9, .1, .1), radius=0.55) #tstdebug.debugSetGridSpheres( # pointNeighbors.keys(),, gridSize, tstFileName+".tree.radius.py", # radius=True, mainColor=(0.01, 0.9, 0.05)) #testing new output of tree with radius foundPaths = 0 pathFile = string.replace(tstFileName, ".nocav.tst", ".py") #very very specific... probably bad but nice... #can always rerun standalone later. knownPathExists = os.path.isfile(pathFile) logName = tstFileName + ".findholes.log" logFile = open(logName, 'w') logFile.write( "number endNumOne endNumTwo plugs stepLength pathLength pathMinRadius " + "pathMaxInsideRadius endMinOne endMinTwo minimaCount travelDepthMax " + "windingMetric avgTheta ") if knownPathExists: logFile.write("pRMSD coverage span wrmsd less1 lessrad radiicomp") logFile.write("\n") if knownPathExists: bestStats = ["err", "err", "err", "err", "err", "err", "err"] bestStatsPaths = [0, 0, 0, 0, 0, 0, 0] sortedByMinRadiusPaths = [] for pathIndex, (outsideOne, outsideTwo, plugs, nodePath) in enumerate( pathsList): pointPath, spheres = [], [] for node in nodePath: pointPath.append(list(node.pathXyz)) spheres.append(list(node.pathXyz)) pointRadius = node.distances["surfout"] # add 'radius' info if not pointRadius or pointRadius == 0.: pointRadius = .000000001 # very small number, on surface pointPath[-1].insert(0, pointRadius) spheres[-1].append(pointRadius) minRad = paths.pathMinRadius(pointPath) newTuple = ( minRad, outsideOne, outsideTwo, plugs, nodePath, pointPath, spheres) #insertion sort into new list position = 0 while position < len(sortedByMinRadiusPaths) and \ sortedByMinRadiusPaths[position][0] > minRad: #only if list can handle it and already inserted are bigger position += 1 sortedByMinRadiusPaths.insert(position, newTuple) print "output files for individual paths" for pathIndex, newTuple in enumerate(sortedByMinRadiusPaths): (minRad, outsideOne, outsideTwo, plugs, nodePath, pointPath, spheres) = \ newTuple # unpack the tuple into the various data throughTris, throughPts = paths.checkPath( pointPath, loopPointsSave, tstDPointXYZ) if throughTris: # it worked... make some more debugging output foundPaths += 1 outName = tstFileName + "." + str(foundPaths) if debugOut and throughTris: tstdebug.debugTrianglesNotOrig( throughTris, tstDPointXYZ, outName+".through.loop.py", ptList=throughPts) #always do these 2 tstdebug.debugSetGridSpheres( pointPath, gridSize, outName + ".pore.py", radius=True, mainColor=(0.01, 0.9, 0.05)) tstdebug.debugSetGridSpheres( pointPath, gridSize, outName + ".path.py", mainColor=(.01, 0.95, 0.9)) #mesh.meshFromSpheres(spheres, 0.5, outName + ".points.py") paths.outputRadiiTxt(pointPath, outName + ".radii.txt") paths.outputNearbyResidues( pointPath, outName, tstDPdbRecord, nearbyDistance) pathLen = paths.pathLength(pointPath) minimaCount = paths.pathMinimaCount(pointPath) maxRad, endMinOne, endMinTwo = paths.insideTwoMinimaRadiusMax(pointPath) travelDepthMax = paths.pathMaxDistance(nodePath, 'traveldepth') windingMetric = paths.computeWindingMetric(pointPath) thetas, avgTheta = paths.averageTheta(pointPath) #print endMinOne, endMinTwo, minimaCount, travelDepthMax, windingMetric, #print avgTheta #attempt to do pRMSD if possible... prmsd, coverage, span, wrmsd, less1, lessrad, radiicomp = \ "err", "err", "err", "err", "err", "err", "err" if knownPathExists: try: sourcePath, sourceRadii = [], [] for pathPt in pointPath: sourcePath.append(pathPt[1:4]) sourceRadii.append(pathPt[0]) prmsd, coverage, span, wrmsd, less1, lessrad, radiicomp = \ comparePathsManyMetrics(False, pathFile, sourcePath, sourceRadii) if bestStats[0] == 'err' or bestStats[0] > prmsd: bestStats[0] = prmsd bestStatsPaths[0] = foundPaths if bestStats[1] == 'err' or bestStats[1] < coverage: bestStats[1] = coverage bestStatsPaths[1] = foundPaths if bestStats[2] == 'err' or bestStats[2] < span: bestStats[2] = span bestStatsPaths[2] = foundPaths if bestStats[3] == 'err' or bestStats[3] > wrmsd: bestStats[3] = wrmsd bestStatsPaths[3] = foundPaths if bestStats[4] == 'err' or bestStats[4] < less1: bestStats[4] = less1 bestStatsPaths[4] = foundPaths if bestStats[5] == 'err' or bestStats[5] < lessrad: bestStats[5] = lessrad bestStatsPaths[5] = foundPaths if bestStats[6] == 'err' or bestStats[6] < radiicomp: bestStats[6] = radiicomp bestStatsPaths[6] = foundPaths except (IOError, TypeError): #if there is no known path file, this should be the error pass #now output data logFile.write(str(foundPaths) + " ") logFile.write(str(outsideOne) + " ") logFile.write(str(outsideTwo) + " ") logFile.write(str(plugs) + " ") logFile.write(str(len(pointPath)) + " ") logFile.write(str(pathLen) + " ") logFile.write(str(minRad) + " ") logFile.write(str(maxRad) + " ") logFile.write(str(endMinOne) + " ") logFile.write(str(endMinTwo) + " ") logFile.write(str(minimaCount) + " ") logFile.write(str(travelDepthMax) + " ") logFile.write(str(windingMetric) + " ") logFile.write(str(avgTheta) + " ") if knownPathExists: logFile.write(str(prmsd) + " ") logFile.write(str(coverage) + " ") logFile.write(str(span) + " ") logFile.write(str(wrmsd) + " ") logFile.write(str(less1) + " ") logFile.write(str(lessrad) + " ") logFile.write(str(radiicomp) + " ") logFile.write("\n") # that's all logFile.close() if knownPathExists: # output bestStats and bestStatsPaths bestName = tstFileName + ".known.best.txt" bestFile = open(bestName, 'w') bestFile.write("pRMSD coverage span wrmsd less1 lessrad radiicomp ") bestFile.write("pRMSD# coverage# span# wrmsd# less1# lessrad# radiicomp#\n") for stat in bestStats: bestFile.write(str(stat) + " ") for stat in bestStatsPaths: bestFile.write(str(stat) + " ") bestFile.write("\n") bestFile.close() print "done with chunnel"