def makeAminoAcidHistogram( plotter, backValues, sideValues, caValues, cbValues, outNameAmino, interval=0.5): '''makes one histogram for one amino acid for the backbone/sidechain values''' outTextFileName = outNameAmino + ".res.txt" maxBoth = max(backValues+sideValues+caValues+cbValues) #print outNameAmino, #print len(backValues), len(sideValues), len(caValues), len(cbValues), maxBoth histoBack, maxBothRet = statistics.computeHistogram( backValues, interval, maxBoth) histoSide, maxBothRet = statistics.computeHistogram( sideValues, interval, maxBoth) histoCa, maxBothRet = statistics.computeHistogram( caValues, interval, maxBoth) histoCb, maxBothRet = statistics.computeHistogram(cbValues, interval, maxBoth) xVals = [] for cutoff in range(2+int(maxBoth/interval)): xVals.append(cutoff*interval) graphDataBack = Gnuplot.Data(xVals, histoBack, title="Backbone") graphDataSide = Gnuplot.Data(xVals, histoSide, title="Sidechain") graphDataCa = Gnuplot.Data(xVals, histoCa, title="C-alpha") graphDataCb = Gnuplot.Data(xVals, histoCb, title="C-beta") if outTextFileName: outTextFile = open(outTextFileName, 'w') outTextFile.write("xVal\tback\tside\tca\tcb\tbackN\tsideN\tcaN\tcbN\n") for index in range(max(len(xVals), len(histoBack))): outTextFile.write(str(xVals[index]) + "\t") outTextFile.write(str(histoBack[index]) + "\t") outTextFile.write(str(histoSide[index]) + "\t") outTextFile.write(str(histoCa[index]) + "\t") outTextFile.write(str(histoCb[index]) + "\t") if sum(histoBack) > 0.: outTextFile.write(str(float(histoBack[index])/sum(histoBack)) + "\t") else: outTextFile.write("0.\t") if sum(histoSide) > 0.: outTextFile.write(str(float(histoSide[index])/sum(histoSide)) + "\t") else: outTextFile.write("0.\t") if sum(histoCa) > 0.: outTextFile.write(str(float(histoCa[index])/sum(histoCa)) + "\t") else: outTextFile.write("0.\t") if sum(histoCb) > 0.: outTextFile.write(str(float(histoCb[index])/sum(histoCb)) + "\n") else: outTextFile.write("0.\n") outTextFile.close() plotter('set terminal png') plotter('set output "' + outNameAmino + '.png"') #plotter('set data style boxes') plotter('set data style linespoints') plotter('set key right top') plotter('set xrange [' + str(min(xVals)-1) + ':' + str(max(xVals)+1) + ']') plotter( 'set yrange [' + str(0.) + ':' + str(1.05*max(histoBack+histoSide)) + ']') plotter.xlabel('Travel In Distance') plotter.ylabel('Atom Count') plotter.plot(graphDataBack, graphDataSide, graphDataCa, graphDataCb)
def summarizeOneFeature(tmDataList, columnName, intervals=50, outName="a.txt"): '''takes that column, makes a histogram for each structure''' outFile = open(outName, 'w') columnNum = tmDataList[0].titleToColumn(columnName) treeData = {} overallMax = 0. for tm3tree in tmDataList: data = tm3tree.getListColumn(columnNum) overallMax = max(overallMax, max(data)) treeData[tm3tree] = data if intervals == "max": intervals = overallMax # 1 per interval = overallMax/intervals # number of intervals desired #print a header outFile.write("name\tcount\tmean\tstddev\t") currentOut = 0. while currentOut < overallMax: outFile.write(str(currentOut) + "\t") currentOut += interval outFile.write("\n") for tm3tree in tmDataList: tm3data = treeData[tm3tree] avgData = statistics.computeMean(tm3data) stddevData = statistics.computeStdDev(tm3data, avgData) histo, outMax = statistics.computeHistogram(tm3data, interval, overallMax) outFile.write(tm3tree.inputFileName + "\t") outFile.write(str(len(tm3data)) + "\t") outFile.write(str(avgData) + "\t") outFile.write(str(stddevData) + "\t") for histoCount in histo: outFile.write(str(histoCount) + "\t") outFile.write("\n") outFile.close()
def processData(dataList, nameList, listPaths, outputFileName="processed.foundholes."): # first do the one big summary output file bestLists = [] compCols = [8, 9, 10, 11, 12, 13, 14] comps = ["min", "max", "max", "min", "max", "max", "min"] compThreshs = [5.0, 0.4, 0.8, 2.5, 0.5, 0.8, 2.5] for index, colIdx in enumerate(compCols): bestList = [] for data in dataList: if comps[index] == "min": bestVal, bestIndex = getMinColumn(data, colIdx) # index 8 is the prmsd else: bestVal, bestIndex = getMaxColumn(data, colIdx) # index 8 is the prmsd bestList.append(bestVal) bestLists.append(bestList) # fileOut = open(outputFileName + colNamesBonus[colIdx-4] + ".best.log", 'w') # for index, name in enumerate(nameList): # fileOut.write(name + " " + str(bestList[index]) + "\n") # fileOut.close() fileOut = open(outputFileName + "overall.best.log", "w") fileOut.write("name pRMSD coverage span wRMSD less1 lessRad radiicomp\n") for index, name in enumerate(nameList): fileOut.write(name + " ") for index2 in range(len(compCols)): fileOut.write(str(bestLists[index2][index]) + " ") fileOut.write("\n") fileOut.close() for colIdx, sortColNumber in enumerate(compCols): bestRankStrings, sortedEvals, backwardsEvals = [], [], [] for index, data in enumerate(dataList): bestRankString, sortedEval, backEval = processDataOne(data, nameList[index], sortColNumber, comps[colIdx]) bestRankStrings.append(bestRankString) sortedEvals.append(sortedEval) backwardsEvals.append(backEval) fileOut = open(outputFileName + colNamesBonus[sortColNumber - 4] + ".best.rankings.log", "w") for bestRankStr in bestRankStrings: fileOut.write(bestRankStr + "\n") fileOut.close() for index, colName in enumerate(colNames): fileOut = open( outputFileName + "rankings." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w" ) for line in sortedEvals: fileOut.write(line[index] + "\n") fileOut.close() fileOut = open( outputFileName + "rankings.reverse." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w" ) for line in backwardsEvals: fileOut.write(line[index] + "\n") fileOut.close() drilledData = [False, False, False, False, [], [], [], [], [], [], [], [], [], []] for drilledPath in listPaths: drilledData[4].append(float(len(drilledPath))) drilledData[5].append(float(paths.pathLength(drilledPath))) drilledData[6].append(float(paths.pathMinRadius(drilledPath))) drilledData[7].append(float(paths.pathMaxInsideRadius(drilledPath))) for column in range(4, 14): columnName = colNamesBonus[column - 4] for colIdx, sortColNumber in enumerate(compCols): # print columnName, colNamesBonus[sortColNumber - 4] columnData = [] selectColumnData = [] bestColumnData = [] for data in dataList: if comps[colIdx] == "min": bestVal, bestIndex = getMinColumn(data, sortColNumber) selectColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, compThreshs[colIdx])) bestColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, bestVal)) else: bestVal, bestIndex = getMaxColumn(data, sortColNumber) selectColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, compThreshs[colIdx])) bestColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, bestVal)) columnData.extend(getOneColumn(data, column)) if len(drilledData[column]) > 0: maxHere = max(max(columnData), max(drilledData[column])) + 1.0 else: maxHere = max(columnData) + 1.0 interval = maxHere / 40.0 histogram = statistics.computeHistogram(columnData, interval, maxData=maxHere) # selectHistogram = statistics.computeHistogram( # selectColumnData, interval, maxData=histogram[1]) bestHistogram = statistics.computeHistogram(bestColumnData, interval, maxData=histogram[1]) if len(drilledData[column]) > 0: realHistogram = statistics.computeHistogram(drilledData[column], interval, maxData=histogram[1]) else: realHistogram = False # need to scale select part of data to be same height as histogram maxHeight = max(histogram[0]) # maxSelectHeight = max(selectHistogram[0]) maxBestHeight = max(bestHistogram[0]) if realHistogram: maxRealHeight = max(realHistogram[0]) realScaledHistogram = [[], realHistogram[1]] for histPoint in realHistogram[0]: realScaledHistogram[0].append(histPoint * maxHeight / maxRealHeight) # selectScaledHistogram = [[], selectHistogram[1]] bestScaledHistogram = [[], bestHistogram[1]] # for histPoint in selectHistogram[0]: # selectScaledHistogram[0].append(histPoint*maxHeight/maxSelectHeight) for histPoint in bestHistogram[0]: bestScaledHistogram[0].append(histPoint * maxHeight / maxBestHeight) # print histogram, len(histogram[0]) # print selectHistogram, len(selectHistogram[0]) # make gnuplot version if possible if gnuplotAvailable: # plotter = Gnuplot.Gnuplot(debug=0) xVals = [] for cutoff in range(2 + int(histogram[1] / interval)): xVals.append(cutoff * interval) graphData = Gnuplot.Data(xVals, histogram[0], title="All") # if comps[colIdx] == 'min': # graphSelectData = Gnuplot.Data( # xVals, selectScaledHistogram[0], # title="<" + str(compThreshs[colIdx]) + " " + # str(colNamesBonus[sortColNumber-4]) + " Scaled by " + # str(maxHeight/maxSelectHeight)) # else: # graphSelectData = Gnuplot.Data( # xVals, selectScaledHistogram[0], title=">" + # str(compThreshs[colIdx]) + " " + # str(colNamesBonus[sortColNumber-4]) + " Scaled by " + # str(maxHeight/maxSelectHeight)) graphBestData = Gnuplot.Data( xVals, bestScaledHistogram[0], title="Best " + str(colNamesBonus[sortColNumber - 4]) + " Scaled by " + str(maxHeight / maxBestHeight), ) if realHistogram: graphRealData = Gnuplot.Data( xVals, realScaledHistogram[0], title="Drilled Holes Scaled by " + str(maxHeight / maxRealHeight) ) graphDataCum = Gnuplot.Data(xVals, statistics.computeCumulativeHistogram(histogram[0])) plotter("set terminal png") plotter('set output "' + outputFileName + columnName + "." + colNamesBonus[sortColNumber - 4] + '.png"') plotter("set data style linespoints") plotter("set xrange [" + str(min(xVals) - 0.01) + ":" + str(max(xVals) + 0.01) + "]") plotter.xlabel(columnName) plotter.ylabel("Count") plotter("set multiplot") plotter("set key right top") if realHistogram: # plotter.plot( # graphData, graphSelectData, graphBestData, graphRealData) plotter.plot(graphData, graphBestData, graphRealData) else: # plotter.plot(graphData, graphSelectData, graphBestData) plotter.plot(graphData, graphBestData) plotter("unset multiplot")
def makeHistogramReport( residueData, outputFilename="histogram.bfactor", interval=0.1, yMaxHisto=0.16): fileTemp = open(outputFilename + ".txt", 'w') fileTemp.write("LowEndInterval Count\n") totalList = [] betaList = [] for oneResName, oneResData in residueData.iteritems(): #assemble into one big list if oneResName in aminoAcid3Codes: for data in oneResData.values(): totalList.extend(data) betaList.extend(oneResData[carbonBetaCodes[oneResName]]) resList = {} betaResList = {} for oneResKey in aminoAcid3Codes: resList[oneResKey] = [] betaResList[oneResKey] = [] if oneResKey in residueData: for data in residueData[oneResKey].values(): resList[oneResKey].extend(data) betaResList[oneResKey].extend( residueData[oneResKey][carbonBetaCodes[oneResKey]]) #now do histogram stuff histogram, maxData = statistics.computeNormalizedHistogram( totalList, interval, 8.) betaHistogram, betaMax = statistics.computeNormalizedHistogram( betaList, interval, 8.) for index, data in enumerate(histogram): fileTemp.write(str(index*interval) + " " + str(data) + "\n") fileTemp.close() #make gnuplot version if possible if gnuplotAvailable: plotter = Gnuplot.Gnuplot(debug=0) xVals = [] for cutoff in range(2+int(maxData/interval)): xVals.append(cutoff*interval) graphData = Gnuplot.Data(xVals, histogram) graphDataCum = Gnuplot.Data( xVals, statistics.computeCumulativeHistogram(histogram)) graphDataBeta = Gnuplot.Data(xVals, betaHistogram) graphDataBetaCum = Gnuplot.Data( xVals, statistics.computeCumulativeHistogram(betaHistogram)) plotter('set terminal png') plotter('set output "' + outputFilename + '.png"') plotter('set data style linespoints') plotter('set xrange [' + str(min(xVals)-1) + ':' + str(max(xVals)+1) + ']') plotter('set yrange [' + str(0.) + ':' + str(yMaxHisto) + ']') plotter.xlabel('Travel In Distance') plotter.ylabel('Atom Count') plotter.plot(graphData) plotter('set output "' + outputFilename + '.beta.png"') plotter.ylabel('Carbon Beta Atom Count') plotter.plot(graphDataBeta) plotter('set output "' + outputFilename + '.cumulative.png"') plotter('set yrange []') # automatic plotter.ylabel('Cumulative Atom Count') plotter.plot(graphDataCum) plotter.ylabel('Cumulative Beta Atom Count') plotter('set output "' + outputFilename + '.cumulative.beta.png"') plotter.plot(graphDataBetaCum) plotter.ylabel('Atom Count') #now do one for each residue plotter('set key right top') plotter('set data style lines') ylabels = ('Atom Count', 'Carbon Beta Atom Count') outputNames = (outputFilename, outputFilename + '.beta') histogramData = (resList, betaResList) for index in range(len(histogramData)): thisResList = histogramData[index] outputName = outputNames[index] ylabel = ylabels[index] plotter.ylabel(ylabel) histograms = {} maxOverRes = 0. for resName in aminoAcid3Codes: histogramRes, maxData = statistics.computeHistogram( thisResList[resName], interval, maxData) histograms[resName] = histogramRes maxOverRes = max(maxOverRes, max(histogramRes)) plotter('set yrange [0:' + str(maxOverRes+1000) + ']') resGraphDatas = [], [] lowGraphDatas = [], [] highGraphDatas = [], [] for resName in aminoAcid3Codes: plotter('set output "' + outputName + "." + resName + '.png"') plotter('set yrange [0:' + str(maxOverRes + 1000) + ']') histogramRes = histograms[resName] resGraphDatas[0].append( Gnuplot.Data(xVals, histogramRes, title=resName)) plotter.plot(resGraphDatas[0][-1]) plotter( 'set output "' + outputName + "." + resName + '.cumulative.png"') plotter('set yrange [0:1]') cumData = Gnuplot.Data( xVals, statistics.computeCumulativeHistogram(histogramRes), title=resName) plotter.plot(cumData) resGraphDatas[1].append(cumData) if resName in highCodes: highGraphDatas[0].append(resGraphDatas[0][-1]) highGraphDatas[1].append(cumData) elif resName in lowCodes: lowGraphDatas[0].append(resGraphDatas[0][-1]) lowGraphDatas[1].append(cumData) #very stupid hack... plot() is dumb outNames = ( 'set output "' + outputName + ".residues" + '.png"', 'set output "' + outputName + ".residues" + '.cumulative.png"') lowNames = ( 'set output "' + outputName + ".residues.low" + '.png"', 'set output "' + outputName + ".residues.low" + '.cumulative.png"') highNames = ( 'set output "' + outputName + ".residues.high" + '.png"', 'set output "' + outputName + ".residues.high" + '.cumulative.png"') ranges = ( 'set yrange [0:' + str(maxOverRes+1000) + ']', 'set yrange [0:1]') for count, resGraphData in enumerate(resGraphDatas): plotter(outNames[count]) plotter('set key right bottom') plotter(ranges[count]) plotter.plot( resGraphData[0], resGraphData[1], resGraphData[2], resGraphData[3], resGraphData[4], resGraphData[5], resGraphData[6], resGraphData[7], resGraphData[8], resGraphData[9], resGraphData[10], resGraphData[11], resGraphData[12], resGraphData[13], resGraphData[14], resGraphData[15], resGraphData[16], resGraphData[17], resGraphData[18], resGraphData[19]) lowGraphData = lowGraphDatas[count] plotter(lowNames[count]) plotter.plot( lowGraphData[0], lowGraphData[1], lowGraphData[2], lowGraphData[3], lowGraphData[4], lowGraphData[5], lowGraphData[6], lowGraphData[7], lowGraphData[8], lowGraphData[9], lowGraphData[10]) highGraphData = highGraphDatas[count] plotter(highNames[count]) plotter.plot( highGraphData[0], highGraphData[1], highGraphData[2], highGraphData[3], highGraphData[4], highGraphData[5], highGraphData[6], highGraphData[7], highGraphData[8]) #limits on x dim outNames2 = ( 'set output "' + outputName + ".residues16" + '.png"', 'set output "' + outputName + ".residues" + '.cumulative16.png"') for count, resGraphData in enumerate(resGraphDatas): plotter(outNames2[count]) plotter('set key right bottom') plotter('set xrange [1:6]') plotter(ranges[count]) plotter.plot( resGraphData[0], resGraphData[1], resGraphData[2], resGraphData[3], resGraphData[4], resGraphData[5], resGraphData[6], resGraphData[7], resGraphData[8], resGraphData[9], resGraphData[10], resGraphData[11], resGraphData[12], resGraphData[13], resGraphData[14], resGraphData[15], resGraphData[16], resGraphData[17], resGraphData[18], resGraphData[19])