def tstTravelFindHoles( tstFileName, phiFileName, debugOut=False, borderSize=2, nearbyDistance=4.): '''if debugout is set, additional files are created. bordersize can change the amount of extra space around the protein. nearbydistance changes the distance that nearby residues are gathered from.''' print "reading tst and phi files" tstD = tstdata.tstData( tstFileName, necessaryKeys=tstdata.tstData.necessaryKeysForHoles) phiData = phi(phiFileName) # read in the phimap if possible gridSize = 1.0 / phiData.scale # needed later numberHandles = tstD.countHandles() print "there are ", numberHandles, " holes in this structure" print "running travel depth now" phiTravelDepthGrid, phiTravelDepthData, meshData = tstTravelDepthMeshRun( tstD, phiData, tstFileName, borderSize=borderSize, threshold="auto") del phiData, phiTravelDepthGrid, phiTravelDepthData #not needed, reclaim memory print "calculating travel out distance" meshData.calculateTravelDistance("surfout", [3], [0, 2]) print "finding holes" loopTrisSave, loopPointsSave, regLoopTris, regLoopPts, pointNeighbors, \ pointNeighborsNodes, outsidePoints, outsidePointsNodes, possHoleStarts = \ tstTopology.fillInHolesAndGrow( tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_TRIANGLE'], tstD.dict['POINT_XYZ'], tstD.dict['NORM_XYZ'], numberHandles, tstFileName, debugOut, meshData, "surfout") if debugOut: tstdebug.debugTriangleList( regLoopTris, tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_XYZ'], tstFileName + ".regular.loops.py") tstDPointXYZ = tstD.dict['POINT_XYZ'] # save for later tstDPdbRecord = tstD.dict['PDB_RECORD'] # save for later del tstD # rest of tstD isn't needed, so free memory #output the possible places where HOLE could start... centers of regular plugs print "writing output files" paths.outputNodesText(possHoleStarts, tstFileName + ".HOLE.start.txt") pathsList = meshData.getPaths( "surfout", pointNeighbors, outsidePoints, possHoleStarts) del meshData # deletes everything that has no other refs (from paths) #print len(pointNeighbors), len(outsidePoints), len(possHoleStarts), #print len(pathsList) allPoints, outsidePts = [], [] for point in pointNeighborsNodes.keys(): allPoints.append(point.pathXyz) for point in outsidePointsNodes: outsidePts.append(point.pathXyz) tstdebug.pointDebug(allPoints, filename=tstFileName+".tree.py") tstdebug.pointDebug( outsidePts, filename=tstFileName+".outside.py", mainColor=(.9, .1, .1), radius=0.55) #tstdebug.debugSetGridSpheres( # pointNeighbors.keys(),, gridSize, tstFileName+".tree.radius.py", # radius=True, mainColor=(0.01, 0.9, 0.05)) #testing new output of tree with radius foundPaths = 0 pathFile = string.replace(tstFileName, ".nocav.tst", ".py") #very very specific... probably bad but nice... #can always rerun standalone later. knownPathExists = os.path.isfile(pathFile) logName = tstFileName + ".findholes.log" logFile = open(logName, 'w') logFile.write( "number endNumOne endNumTwo plugs stepLength pathLength pathMinRadius " + "pathMaxInsideRadius endMinOne endMinTwo minimaCount travelDepthMax " + "windingMetric avgTheta ") if knownPathExists: logFile.write("pRMSD coverage span wrmsd less1 lessrad radiicomp") logFile.write("\n") if knownPathExists: bestStats = ["err", "err", "err", "err", "err", "err", "err"] bestStatsPaths = [0, 0, 0, 0, 0, 0, 0] sortedByMinRadiusPaths = [] for pathIndex, (outsideOne, outsideTwo, plugs, nodePath) in enumerate( pathsList): pointPath, spheres = [], [] for node in nodePath: pointPath.append(list(node.pathXyz)) spheres.append(list(node.pathXyz)) pointRadius = node.distances["surfout"] # add 'radius' info if not pointRadius or pointRadius == 0.: pointRadius = .000000001 # very small number, on surface pointPath[-1].insert(0, pointRadius) spheres[-1].append(pointRadius) minRad = paths.pathMinRadius(pointPath) newTuple = ( minRad, outsideOne, outsideTwo, plugs, nodePath, pointPath, spheres) #insertion sort into new list position = 0 while position < len(sortedByMinRadiusPaths) and \ sortedByMinRadiusPaths[position][0] > minRad: #only if list can handle it and already inserted are bigger position += 1 sortedByMinRadiusPaths.insert(position, newTuple) print "output files for individual paths" for pathIndex, newTuple in enumerate(sortedByMinRadiusPaths): (minRad, outsideOne, outsideTwo, plugs, nodePath, pointPath, spheres) = \ newTuple # unpack the tuple into the various data throughTris, throughPts = paths.checkPath( pointPath, loopPointsSave, tstDPointXYZ) if throughTris: # it worked... make some more debugging output foundPaths += 1 outName = tstFileName + "." + str(foundPaths) if debugOut and throughTris: tstdebug.debugTrianglesNotOrig( throughTris, tstDPointXYZ, outName+".through.loop.py", ptList=throughPts) #always do these 2 tstdebug.debugSetGridSpheres( pointPath, gridSize, outName + ".pore.py", radius=True, mainColor=(0.01, 0.9, 0.05)) tstdebug.debugSetGridSpheres( pointPath, gridSize, outName + ".path.py", mainColor=(.01, 0.95, 0.9)) #mesh.meshFromSpheres(spheres, 0.5, outName + ".points.py") paths.outputRadiiTxt(pointPath, outName + ".radii.txt") paths.outputNearbyResidues( pointPath, outName, tstDPdbRecord, nearbyDistance) pathLen = paths.pathLength(pointPath) minimaCount = paths.pathMinimaCount(pointPath) maxRad, endMinOne, endMinTwo = paths.insideTwoMinimaRadiusMax(pointPath) travelDepthMax = paths.pathMaxDistance(nodePath, 'traveldepth') windingMetric = paths.computeWindingMetric(pointPath) thetas, avgTheta = paths.averageTheta(pointPath) #print endMinOne, endMinTwo, minimaCount, travelDepthMax, windingMetric, #print avgTheta #attempt to do pRMSD if possible... prmsd, coverage, span, wrmsd, less1, lessrad, radiicomp = \ "err", "err", "err", "err", "err", "err", "err" if knownPathExists: try: sourcePath, sourceRadii = [], [] for pathPt in pointPath: sourcePath.append(pathPt[1:4]) sourceRadii.append(pathPt[0]) prmsd, coverage, span, wrmsd, less1, lessrad, radiicomp = \ comparePathsManyMetrics(False, pathFile, sourcePath, sourceRadii) if bestStats[0] == 'err' or bestStats[0] > prmsd: bestStats[0] = prmsd bestStatsPaths[0] = foundPaths if bestStats[1] == 'err' or bestStats[1] < coverage: bestStats[1] = coverage bestStatsPaths[1] = foundPaths if bestStats[2] == 'err' or bestStats[2] < span: bestStats[2] = span bestStatsPaths[2] = foundPaths if bestStats[3] == 'err' or bestStats[3] > wrmsd: bestStats[3] = wrmsd bestStatsPaths[3] = foundPaths if bestStats[4] == 'err' or bestStats[4] < less1: bestStats[4] = less1 bestStatsPaths[4] = foundPaths if bestStats[5] == 'err' or bestStats[5] < lessrad: bestStats[5] = lessrad bestStatsPaths[5] = foundPaths if bestStats[6] == 'err' or bestStats[6] < radiicomp: bestStats[6] = radiicomp bestStatsPaths[6] = foundPaths except (IOError, TypeError): #if there is no known path file, this should be the error pass #now output data logFile.write(str(foundPaths) + " ") logFile.write(str(outsideOne) + " ") logFile.write(str(outsideTwo) + " ") logFile.write(str(plugs) + " ") logFile.write(str(len(pointPath)) + " ") logFile.write(str(pathLen) + " ") logFile.write(str(minRad) + " ") logFile.write(str(maxRad) + " ") logFile.write(str(endMinOne) + " ") logFile.write(str(endMinTwo) + " ") logFile.write(str(minimaCount) + " ") logFile.write(str(travelDepthMax) + " ") logFile.write(str(windingMetric) + " ") logFile.write(str(avgTheta) + " ") if knownPathExists: logFile.write(str(prmsd) + " ") logFile.write(str(coverage) + " ") logFile.write(str(span) + " ") logFile.write(str(wrmsd) + " ") logFile.write(str(less1) + " ") logFile.write(str(lessrad) + " ") logFile.write(str(radiicomp) + " ") logFile.write("\n") # that's all logFile.close() if knownPathExists: # output bestStats and bestStatsPaths bestName = tstFileName + ".known.best.txt" bestFile = open(bestName, 'w') bestFile.write("pRMSD coverage span wrmsd less1 lessrad radiicomp ") bestFile.write("pRMSD# coverage# span# wrmsd# less1# lessrad# radiicomp#\n") for stat in bestStats: bestFile.write(str(stat) + " ") for stat in bestStatsPaths: bestFile.write(str(stat) + " ") bestFile.write("\n") bestFile.close() print "done with chunnel"
def processData(dataList, nameList, listPaths, outputFileName="processed.foundholes."): # first do the one big summary output file bestLists = [] compCols = [8, 9, 10, 11, 12, 13, 14] comps = ["min", "max", "max", "min", "max", "max", "min"] compThreshs = [5.0, 0.4, 0.8, 2.5, 0.5, 0.8, 2.5] for index, colIdx in enumerate(compCols): bestList = [] for data in dataList: if comps[index] == "min": bestVal, bestIndex = getMinColumn(data, colIdx) # index 8 is the prmsd else: bestVal, bestIndex = getMaxColumn(data, colIdx) # index 8 is the prmsd bestList.append(bestVal) bestLists.append(bestList) # fileOut = open(outputFileName + colNamesBonus[colIdx-4] + ".best.log", 'w') # for index, name in enumerate(nameList): # fileOut.write(name + " " + str(bestList[index]) + "\n") # fileOut.close() fileOut = open(outputFileName + "overall.best.log", "w") fileOut.write("name pRMSD coverage span wRMSD less1 lessRad radiicomp\n") for index, name in enumerate(nameList): fileOut.write(name + " ") for index2 in range(len(compCols)): fileOut.write(str(bestLists[index2][index]) + " ") fileOut.write("\n") fileOut.close() for colIdx, sortColNumber in enumerate(compCols): bestRankStrings, sortedEvals, backwardsEvals = [], [], [] for index, data in enumerate(dataList): bestRankString, sortedEval, backEval = processDataOne(data, nameList[index], sortColNumber, comps[colIdx]) bestRankStrings.append(bestRankString) sortedEvals.append(sortedEval) backwardsEvals.append(backEval) fileOut = open(outputFileName + colNamesBonus[sortColNumber - 4] + ".best.rankings.log", "w") for bestRankStr in bestRankStrings: fileOut.write(bestRankStr + "\n") fileOut.close() for index, colName in enumerate(colNames): fileOut = open( outputFileName + "rankings." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w" ) for line in sortedEvals: fileOut.write(line[index] + "\n") fileOut.close() fileOut = open( outputFileName + "rankings.reverse." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w" ) for line in backwardsEvals: fileOut.write(line[index] + "\n") fileOut.close() drilledData = [False, False, False, False, [], [], [], [], [], [], [], [], [], []] for drilledPath in listPaths: drilledData[4].append(float(len(drilledPath))) drilledData[5].append(float(paths.pathLength(drilledPath))) drilledData[6].append(float(paths.pathMinRadius(drilledPath))) drilledData[7].append(float(paths.pathMaxInsideRadius(drilledPath))) for column in range(4, 14): columnName = colNamesBonus[column - 4] for colIdx, sortColNumber in enumerate(compCols): # print columnName, colNamesBonus[sortColNumber - 4] columnData = [] selectColumnData = [] bestColumnData = [] for data in dataList: if comps[colIdx] == "min": bestVal, bestIndex = getMinColumn(data, sortColNumber) selectColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, compThreshs[colIdx])) bestColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, bestVal)) else: bestVal, bestIndex = getMaxColumn(data, sortColNumber) selectColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, compThreshs[colIdx])) bestColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, bestVal)) columnData.extend(getOneColumn(data, column)) if len(drilledData[column]) > 0: maxHere = max(max(columnData), max(drilledData[column])) + 1.0 else: maxHere = max(columnData) + 1.0 interval = maxHere / 40.0 histogram = statistics.computeHistogram(columnData, interval, maxData=maxHere) # selectHistogram = statistics.computeHistogram( # selectColumnData, interval, maxData=histogram[1]) bestHistogram = statistics.computeHistogram(bestColumnData, interval, maxData=histogram[1]) if len(drilledData[column]) > 0: realHistogram = statistics.computeHistogram(drilledData[column], interval, maxData=histogram[1]) else: realHistogram = False # need to scale select part of data to be same height as histogram maxHeight = max(histogram[0]) # maxSelectHeight = max(selectHistogram[0]) maxBestHeight = max(bestHistogram[0]) if realHistogram: maxRealHeight = max(realHistogram[0]) realScaledHistogram = [[], realHistogram[1]] for histPoint in realHistogram[0]: realScaledHistogram[0].append(histPoint * maxHeight / maxRealHeight) # selectScaledHistogram = [[], selectHistogram[1]] bestScaledHistogram = [[], bestHistogram[1]] # for histPoint in selectHistogram[0]: # selectScaledHistogram[0].append(histPoint*maxHeight/maxSelectHeight) for histPoint in bestHistogram[0]: bestScaledHistogram[0].append(histPoint * maxHeight / maxBestHeight) # print histogram, len(histogram[0]) # print selectHistogram, len(selectHistogram[0]) # make gnuplot version if possible if gnuplotAvailable: # plotter = Gnuplot.Gnuplot(debug=0) xVals = [] for cutoff in range(2 + int(histogram[1] / interval)): xVals.append(cutoff * interval) graphData = Gnuplot.Data(xVals, histogram[0], title="All") # if comps[colIdx] == 'min': # graphSelectData = Gnuplot.Data( # xVals, selectScaledHistogram[0], # title="<" + str(compThreshs[colIdx]) + " " + # str(colNamesBonus[sortColNumber-4]) + " Scaled by " + # str(maxHeight/maxSelectHeight)) # else: # graphSelectData = Gnuplot.Data( # xVals, selectScaledHistogram[0], title=">" + # str(compThreshs[colIdx]) + " " + # str(colNamesBonus[sortColNumber-4]) + " Scaled by " + # str(maxHeight/maxSelectHeight)) graphBestData = Gnuplot.Data( xVals, bestScaledHistogram[0], title="Best " + str(colNamesBonus[sortColNumber - 4]) + " Scaled by " + str(maxHeight / maxBestHeight), ) if realHistogram: graphRealData = Gnuplot.Data( xVals, realScaledHistogram[0], title="Drilled Holes Scaled by " + str(maxHeight / maxRealHeight) ) graphDataCum = Gnuplot.Data(xVals, statistics.computeCumulativeHistogram(histogram[0])) plotter("set terminal png") plotter('set output "' + outputFileName + columnName + "." + colNamesBonus[sortColNumber - 4] + '.png"') plotter("set data style linespoints") plotter("set xrange [" + str(min(xVals) - 0.01) + ":" + str(max(xVals) + 0.01) + "]") plotter.xlabel(columnName) plotter.ylabel("Count") plotter("set multiplot") plotter("set key right top") if realHistogram: # plotter.plot( # graphData, graphSelectData, graphBestData, graphRealData) plotter.plot(graphData, graphBestData, graphRealData) else: # plotter.plot(graphData, graphSelectData, graphBestData) plotter.plot(graphData, graphBestData) plotter("unset multiplot")