def processData(dataList, nameList, listPaths, outputFileName="processed.foundholes."): # first do the one big summary output file bestLists = [] compCols = [8, 9, 10, 11, 12, 13, 14] comps = ["min", "max", "max", "min", "max", "max", "min"] compThreshs = [5.0, 0.4, 0.8, 2.5, 0.5, 0.8, 2.5] for index, colIdx in enumerate(compCols): bestList = [] for data in dataList: if comps[index] == "min": bestVal, bestIndex = getMinColumn(data, colIdx) # index 8 is the prmsd else: bestVal, bestIndex = getMaxColumn(data, colIdx) # index 8 is the prmsd bestList.append(bestVal) bestLists.append(bestList) # fileOut = open(outputFileName + colNamesBonus[colIdx-4] + ".best.log", 'w') # for index, name in enumerate(nameList): # fileOut.write(name + " " + str(bestList[index]) + "\n") # fileOut.close() fileOut = open(outputFileName + "overall.best.log", "w") fileOut.write("name pRMSD coverage span wRMSD less1 lessRad radiicomp\n") for index, name in enumerate(nameList): fileOut.write(name + " ") for index2 in range(len(compCols)): fileOut.write(str(bestLists[index2][index]) + " ") fileOut.write("\n") fileOut.close() for colIdx, sortColNumber in enumerate(compCols): bestRankStrings, sortedEvals, backwardsEvals = [], [], [] for index, data in enumerate(dataList): bestRankString, sortedEval, backEval = processDataOne(data, nameList[index], sortColNumber, comps[colIdx]) bestRankStrings.append(bestRankString) sortedEvals.append(sortedEval) backwardsEvals.append(backEval) fileOut = open(outputFileName + colNamesBonus[sortColNumber - 4] + ".best.rankings.log", "w") for bestRankStr in bestRankStrings: fileOut.write(bestRankStr + "\n") fileOut.close() for index, colName in enumerate(colNames): fileOut = open( outputFileName + "rankings." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w" ) for line in sortedEvals: fileOut.write(line[index] + "\n") fileOut.close() fileOut = open( outputFileName + "rankings.reverse." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w" ) for line in backwardsEvals: fileOut.write(line[index] + "\n") fileOut.close() drilledData = [False, False, False, False, [], [], [], [], [], [], [], [], [], []] for drilledPath in listPaths: drilledData[4].append(float(len(drilledPath))) drilledData[5].append(float(paths.pathLength(drilledPath))) drilledData[6].append(float(paths.pathMinRadius(drilledPath))) drilledData[7].append(float(paths.pathMaxInsideRadius(drilledPath))) for column in range(4, 14): columnName = colNamesBonus[column - 4] for colIdx, sortColNumber in enumerate(compCols): # print columnName, colNamesBonus[sortColNumber - 4] columnData = [] selectColumnData = [] bestColumnData = [] for data in dataList: if comps[colIdx] == "min": bestVal, bestIndex = getMinColumn(data, sortColNumber) selectColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, compThreshs[colIdx])) bestColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, bestVal)) else: bestVal, bestIndex = getMaxColumn(data, sortColNumber) selectColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, compThreshs[colIdx])) bestColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, bestVal)) columnData.extend(getOneColumn(data, column)) if len(drilledData[column]) > 0: maxHere = max(max(columnData), max(drilledData[column])) + 1.0 else: maxHere = max(columnData) + 1.0 interval = maxHere / 40.0 histogram = statistics.computeHistogram(columnData, interval, maxData=maxHere) # selectHistogram = statistics.computeHistogram( # selectColumnData, interval, maxData=histogram[1]) bestHistogram = statistics.computeHistogram(bestColumnData, interval, maxData=histogram[1]) if len(drilledData[column]) > 0: realHistogram = statistics.computeHistogram(drilledData[column], interval, maxData=histogram[1]) else: realHistogram = False # need to scale select part of data to be same height as histogram maxHeight = max(histogram[0]) # maxSelectHeight = max(selectHistogram[0]) maxBestHeight = max(bestHistogram[0]) if realHistogram: maxRealHeight = max(realHistogram[0]) realScaledHistogram = [[], realHistogram[1]] for histPoint in realHistogram[0]: realScaledHistogram[0].append(histPoint * maxHeight / maxRealHeight) # selectScaledHistogram = [[], selectHistogram[1]] bestScaledHistogram = [[], bestHistogram[1]] # for histPoint in selectHistogram[0]: # selectScaledHistogram[0].append(histPoint*maxHeight/maxSelectHeight) for histPoint in bestHistogram[0]: bestScaledHistogram[0].append(histPoint * maxHeight / maxBestHeight) # print histogram, len(histogram[0]) # print selectHistogram, len(selectHistogram[0]) # make gnuplot version if possible if gnuplotAvailable: # plotter = Gnuplot.Gnuplot(debug=0) xVals = [] for cutoff in range(2 + int(histogram[1] / interval)): xVals.append(cutoff * interval) graphData = Gnuplot.Data(xVals, histogram[0], title="All") # if comps[colIdx] == 'min': # graphSelectData = Gnuplot.Data( # xVals, selectScaledHistogram[0], # title="<" + str(compThreshs[colIdx]) + " " + # str(colNamesBonus[sortColNumber-4]) + " Scaled by " + # str(maxHeight/maxSelectHeight)) # else: # graphSelectData = Gnuplot.Data( # xVals, selectScaledHistogram[0], title=">" + # str(compThreshs[colIdx]) + " " + # str(colNamesBonus[sortColNumber-4]) + " Scaled by " + # str(maxHeight/maxSelectHeight)) graphBestData = Gnuplot.Data( xVals, bestScaledHistogram[0], title="Best " + str(colNamesBonus[sortColNumber - 4]) + " Scaled by " + str(maxHeight / maxBestHeight), ) if realHistogram: graphRealData = Gnuplot.Data( xVals, realScaledHistogram[0], title="Drilled Holes Scaled by " + str(maxHeight / maxRealHeight) ) graphDataCum = Gnuplot.Data(xVals, statistics.computeCumulativeHistogram(histogram[0])) plotter("set terminal png") plotter('set output "' + outputFileName + columnName + "." + colNamesBonus[sortColNumber - 4] + '.png"') plotter("set data style linespoints") plotter("set xrange [" + str(min(xVals) - 0.01) + ":" + str(max(xVals) + 0.01) + "]") plotter.xlabel(columnName) plotter.ylabel("Count") plotter("set multiplot") plotter("set key right top") if realHistogram: # plotter.plot( # graphData, graphSelectData, graphBestData, graphRealData) plotter.plot(graphData, graphBestData, graphRealData) else: # plotter.plot(graphData, graphSelectData, graphBestData) plotter.plot(graphData, graphBestData) plotter("unset multiplot")
def makeHistogramReport( residueData, outputFilename="histogram.bfactor", interval=0.1, yMaxHisto=0.16): fileTemp = open(outputFilename + ".txt", 'w') fileTemp.write("LowEndInterval Count\n") totalList = [] betaList = [] for oneResName, oneResData in residueData.iteritems(): #assemble into one big list if oneResName in aminoAcid3Codes: for data in oneResData.values(): totalList.extend(data) betaList.extend(oneResData[carbonBetaCodes[oneResName]]) resList = {} betaResList = {} for oneResKey in aminoAcid3Codes: resList[oneResKey] = [] betaResList[oneResKey] = [] if oneResKey in residueData: for data in residueData[oneResKey].values(): resList[oneResKey].extend(data) betaResList[oneResKey].extend( residueData[oneResKey][carbonBetaCodes[oneResKey]]) #now do histogram stuff histogram, maxData = statistics.computeNormalizedHistogram( totalList, interval, 8.) betaHistogram, betaMax = statistics.computeNormalizedHistogram( betaList, interval, 8.) for index, data in enumerate(histogram): fileTemp.write(str(index*interval) + " " + str(data) + "\n") fileTemp.close() #make gnuplot version if possible if gnuplotAvailable: plotter = Gnuplot.Gnuplot(debug=0) xVals = [] for cutoff in range(2+int(maxData/interval)): xVals.append(cutoff*interval) graphData = Gnuplot.Data(xVals, histogram) graphDataCum = Gnuplot.Data( xVals, statistics.computeCumulativeHistogram(histogram)) graphDataBeta = Gnuplot.Data(xVals, betaHistogram) graphDataBetaCum = Gnuplot.Data( xVals, statistics.computeCumulativeHistogram(betaHistogram)) plotter('set terminal png') plotter('set output "' + outputFilename + '.png"') plotter('set data style linespoints') plotter('set xrange [' + str(min(xVals)-1) + ':' + str(max(xVals)+1) + ']') plotter('set yrange [' + str(0.) + ':' + str(yMaxHisto) + ']') plotter.xlabel('Travel In Distance') plotter.ylabel('Atom Count') plotter.plot(graphData) plotter('set output "' + outputFilename + '.beta.png"') plotter.ylabel('Carbon Beta Atom Count') plotter.plot(graphDataBeta) plotter('set output "' + outputFilename + '.cumulative.png"') plotter('set yrange []') # automatic plotter.ylabel('Cumulative Atom Count') plotter.plot(graphDataCum) plotter.ylabel('Cumulative Beta Atom Count') plotter('set output "' + outputFilename + '.cumulative.beta.png"') plotter.plot(graphDataBetaCum) plotter.ylabel('Atom Count') #now do one for each residue plotter('set key right top') plotter('set data style lines') ylabels = ('Atom Count', 'Carbon Beta Atom Count') outputNames = (outputFilename, outputFilename + '.beta') histogramData = (resList, betaResList) for index in range(len(histogramData)): thisResList = histogramData[index] outputName = outputNames[index] ylabel = ylabels[index] plotter.ylabel(ylabel) histograms = {} maxOverRes = 0. for resName in aminoAcid3Codes: histogramRes, maxData = statistics.computeHistogram( thisResList[resName], interval, maxData) histograms[resName] = histogramRes maxOverRes = max(maxOverRes, max(histogramRes)) plotter('set yrange [0:' + str(maxOverRes+1000) + ']') resGraphDatas = [], [] lowGraphDatas = [], [] highGraphDatas = [], [] for resName in aminoAcid3Codes: plotter('set output "' + outputName + "." + resName + '.png"') plotter('set yrange [0:' + str(maxOverRes + 1000) + ']') histogramRes = histograms[resName] resGraphDatas[0].append( Gnuplot.Data(xVals, histogramRes, title=resName)) plotter.plot(resGraphDatas[0][-1]) plotter( 'set output "' + outputName + "." + resName + '.cumulative.png"') plotter('set yrange [0:1]') cumData = Gnuplot.Data( xVals, statistics.computeCumulativeHistogram(histogramRes), title=resName) plotter.plot(cumData) resGraphDatas[1].append(cumData) if resName in highCodes: highGraphDatas[0].append(resGraphDatas[0][-1]) highGraphDatas[1].append(cumData) elif resName in lowCodes: lowGraphDatas[0].append(resGraphDatas[0][-1]) lowGraphDatas[1].append(cumData) #very stupid hack... plot() is dumb outNames = ( 'set output "' + outputName + ".residues" + '.png"', 'set output "' + outputName + ".residues" + '.cumulative.png"') lowNames = ( 'set output "' + outputName + ".residues.low" + '.png"', 'set output "' + outputName + ".residues.low" + '.cumulative.png"') highNames = ( 'set output "' + outputName + ".residues.high" + '.png"', 'set output "' + outputName + ".residues.high" + '.cumulative.png"') ranges = ( 'set yrange [0:' + str(maxOverRes+1000) + ']', 'set yrange [0:1]') for count, resGraphData in enumerate(resGraphDatas): plotter(outNames[count]) plotter('set key right bottom') plotter(ranges[count]) plotter.plot( resGraphData[0], resGraphData[1], resGraphData[2], resGraphData[3], resGraphData[4], resGraphData[5], resGraphData[6], resGraphData[7], resGraphData[8], resGraphData[9], resGraphData[10], resGraphData[11], resGraphData[12], resGraphData[13], resGraphData[14], resGraphData[15], resGraphData[16], resGraphData[17], resGraphData[18], resGraphData[19]) lowGraphData = lowGraphDatas[count] plotter(lowNames[count]) plotter.plot( lowGraphData[0], lowGraphData[1], lowGraphData[2], lowGraphData[3], lowGraphData[4], lowGraphData[5], lowGraphData[6], lowGraphData[7], lowGraphData[8], lowGraphData[9], lowGraphData[10]) highGraphData = highGraphDatas[count] plotter(highNames[count]) plotter.plot( highGraphData[0], highGraphData[1], highGraphData[2], highGraphData[3], highGraphData[4], highGraphData[5], highGraphData[6], highGraphData[7], highGraphData[8]) #limits on x dim outNames2 = ( 'set output "' + outputName + ".residues16" + '.png"', 'set output "' + outputName + ".residues" + '.cumulative16.png"') for count, resGraphData in enumerate(resGraphDatas): plotter(outNames2[count]) plotter('set key right bottom') plotter('set xrange [1:6]') plotter(ranges[count]) plotter.plot( resGraphData[0], resGraphData[1], resGraphData[2], resGraphData[3], resGraphData[4], resGraphData[5], resGraphData[6], resGraphData[7], resGraphData[8], resGraphData[9], resGraphData[10], resGraphData[11], resGraphData[12], resGraphData[13], resGraphData[14], resGraphData[15], resGraphData[16], resGraphData[17], resGraphData[18], resGraphData[19])