def makeAminoAcidHistogram(
    plotter,  backValues, sideValues, caValues, cbValues, outNameAmino,
    interval=0.5):
  '''makes one histogram for one amino acid for the backbone/sidechain values'''
  outTextFileName = outNameAmino + ".res.txt"
  maxBoth = max(backValues+sideValues+caValues+cbValues)
  #print outNameAmino,
  #print len(backValues), len(sideValues), len(caValues), len(cbValues), maxBoth
  histoBack, maxBothRet = statistics.computeHistogram(
      backValues, interval, maxBoth)
  histoSide, maxBothRet = statistics.computeHistogram(
      sideValues, interval, maxBoth)
  histoCa, maxBothRet = statistics.computeHistogram(
      caValues, interval, maxBoth)
  histoCb, maxBothRet = statistics.computeHistogram(cbValues, interval, maxBoth)
  xVals = []
  for cutoff in range(2+int(maxBoth/interval)):
    xVals.append(cutoff*interval)
  graphDataBack = Gnuplot.Data(xVals, histoBack, title="Backbone")
  graphDataSide = Gnuplot.Data(xVals, histoSide, title="Sidechain")
  graphDataCa = Gnuplot.Data(xVals, histoCa, title="C-alpha")
  graphDataCb = Gnuplot.Data(xVals, histoCb, title="C-beta")
  if outTextFileName:
    outTextFile = open(outTextFileName, 'w')
    outTextFile.write("xVal\tback\tside\tca\tcb\tbackN\tsideN\tcaN\tcbN\n")
    for index in range(max(len(xVals), len(histoBack))):
      outTextFile.write(str(xVals[index]) + "\t")
      outTextFile.write(str(histoBack[index]) + "\t")
      outTextFile.write(str(histoSide[index]) + "\t")
      outTextFile.write(str(histoCa[index]) + "\t")
      outTextFile.write(str(histoCb[index]) + "\t")
      if sum(histoBack) > 0.:
        outTextFile.write(str(float(histoBack[index])/sum(histoBack)) + "\t")
      else:
        outTextFile.write("0.\t")
      if sum(histoSide) > 0.:
        outTextFile.write(str(float(histoSide[index])/sum(histoSide)) + "\t")
      else:
        outTextFile.write("0.\t")
      if sum(histoCa) > 0.:
        outTextFile.write(str(float(histoCa[index])/sum(histoCa)) + "\t")
      else:
        outTextFile.write("0.\t")
      if sum(histoCb) > 0.:
        outTextFile.write(str(float(histoCb[index])/sum(histoCb)) + "\n")
      else:
        outTextFile.write("0.\n")
    outTextFile.close()
  plotter('set terminal png')
  plotter('set output "' + outNameAmino + '.png"')
  #plotter('set data style boxes')
  plotter('set data style linespoints')
  plotter('set key right top')
  plotter('set xrange [' + str(min(xVals)-1) + ':' + str(max(xVals)+1) + ']')
  plotter(
      'set yrange [' + str(0.) + ':' + str(1.05*max(histoBack+histoSide)) + ']')
  plotter.xlabel('Travel In Distance')
  plotter.ylabel('Atom Count')
  plotter.plot(graphDataBack, graphDataSide, graphDataCa, graphDataCb)
def summarizeOneFeature(tmDataList, columnName, intervals=50, outName="a.txt"):
  '''takes that column, makes a histogram for each structure'''
  outFile = open(outName, 'w')
  columnNum = tmDataList[0].titleToColumn(columnName)
  treeData = {}
  overallMax = 0.
  for tm3tree in tmDataList:
    data = tm3tree.getListColumn(columnNum)
    overallMax = max(overallMax, max(data))
    treeData[tm3tree] = data
  if intervals == "max":
    intervals = overallMax  # 1 per
  interval = overallMax/intervals  # number of intervals desired
  #print a header
  outFile.write("name\tcount\tmean\tstddev\t")
  currentOut = 0.
  while currentOut < overallMax:
    outFile.write(str(currentOut) + "\t")
    currentOut += interval
  outFile.write("\n")
  for tm3tree in tmDataList:
    tm3data = treeData[tm3tree]
    avgData = statistics.computeMean(tm3data)
    stddevData = statistics.computeStdDev(tm3data, avgData)
    histo, outMax = statistics.computeHistogram(tm3data, interval, overallMax)
    outFile.write(tm3tree.inputFileName + "\t")
    outFile.write(str(len(tm3data)) + "\t")
    outFile.write(str(avgData) + "\t")
    outFile.write(str(stddevData) + "\t")
    for histoCount in histo:
      outFile.write(str(histoCount) + "\t")
    outFile.write("\n")
  outFile.close()
def processData(dataList, nameList, listPaths, outputFileName="processed.foundholes."):
    # first do the one big summary output file
    bestLists = []
    compCols = [8, 9, 10, 11, 12, 13, 14]
    comps = ["min", "max", "max", "min", "max", "max", "min"]
    compThreshs = [5.0, 0.4, 0.8, 2.5, 0.5, 0.8, 2.5]
    for index, colIdx in enumerate(compCols):
        bestList = []
        for data in dataList:
            if comps[index] == "min":
                bestVal, bestIndex = getMinColumn(data, colIdx)  # index 8 is the prmsd
            else:
                bestVal, bestIndex = getMaxColumn(data, colIdx)  # index 8 is the prmsd
            bestList.append(bestVal)
        bestLists.append(bestList)
        # fileOut = open(outputFileName + colNamesBonus[colIdx-4] + ".best.log", 'w')
        # for index, name in enumerate(nameList):
        #  fileOut.write(name + " " + str(bestList[index]) + "\n")
        # fileOut.close()
    fileOut = open(outputFileName + "overall.best.log", "w")
    fileOut.write("name pRMSD coverage span wRMSD less1 lessRad radiicomp\n")
    for index, name in enumerate(nameList):
        fileOut.write(name + " ")
        for index2 in range(len(compCols)):
            fileOut.write(str(bestLists[index2][index]) + " ")
        fileOut.write("\n")
    fileOut.close()
    for colIdx, sortColNumber in enumerate(compCols):
        bestRankStrings, sortedEvals, backwardsEvals = [], [], []
        for index, data in enumerate(dataList):
            bestRankString, sortedEval, backEval = processDataOne(data, nameList[index], sortColNumber, comps[colIdx])
            bestRankStrings.append(bestRankString)
            sortedEvals.append(sortedEval)
            backwardsEvals.append(backEval)
        fileOut = open(outputFileName + colNamesBonus[sortColNumber - 4] + ".best.rankings.log", "w")
        for bestRankStr in bestRankStrings:
            fileOut.write(bestRankStr + "\n")
        fileOut.close()
        for index, colName in enumerate(colNames):
            fileOut = open(
                outputFileName + "rankings." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w"
            )
            for line in sortedEvals:
                fileOut.write(line[index] + "\n")
            fileOut.close()
            fileOut = open(
                outputFileName + "rankings.reverse." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w"
            )
            for line in backwardsEvals:
                fileOut.write(line[index] + "\n")
            fileOut.close()
    drilledData = [False, False, False, False, [], [], [], [], [], [], [], [], [], []]
    for drilledPath in listPaths:
        drilledData[4].append(float(len(drilledPath)))
        drilledData[5].append(float(paths.pathLength(drilledPath)))
        drilledData[6].append(float(paths.pathMinRadius(drilledPath)))
        drilledData[7].append(float(paths.pathMaxInsideRadius(drilledPath)))
    for column in range(4, 14):
        columnName = colNamesBonus[column - 4]
        for colIdx, sortColNumber in enumerate(compCols):
            # print columnName, colNamesBonus[sortColNumber - 4]
            columnData = []
            selectColumnData = []
            bestColumnData = []
            for data in dataList:
                if comps[colIdx] == "min":
                    bestVal, bestIndex = getMinColumn(data, sortColNumber)
                    selectColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, compThreshs[colIdx]))
                    bestColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, bestVal))
                else:
                    bestVal, bestIndex = getMaxColumn(data, sortColNumber)
                    selectColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, compThreshs[colIdx]))
                    bestColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, bestVal))
                columnData.extend(getOneColumn(data, column))
            if len(drilledData[column]) > 0:
                maxHere = max(max(columnData), max(drilledData[column])) + 1.0
            else:
                maxHere = max(columnData) + 1.0
            interval = maxHere / 40.0
            histogram = statistics.computeHistogram(columnData, interval, maxData=maxHere)
            # selectHistogram = statistics.computeHistogram(
            #    selectColumnData, interval, maxData=histogram[1])
            bestHistogram = statistics.computeHistogram(bestColumnData, interval, maxData=histogram[1])
            if len(drilledData[column]) > 0:
                realHistogram = statistics.computeHistogram(drilledData[column], interval, maxData=histogram[1])
            else:
                realHistogram = False
            # need to scale select part of data to be same height as histogram
            maxHeight = max(histogram[0])
            # maxSelectHeight = max(selectHistogram[0])
            maxBestHeight = max(bestHistogram[0])
            if realHistogram:
                maxRealHeight = max(realHistogram[0])
                realScaledHistogram = [[], realHistogram[1]]
                for histPoint in realHistogram[0]:
                    realScaledHistogram[0].append(histPoint * maxHeight / maxRealHeight)
            # selectScaledHistogram = [[], selectHistogram[1]]
            bestScaledHistogram = [[], bestHistogram[1]]
            # for histPoint in selectHistogram[0]:
            # selectScaledHistogram[0].append(histPoint*maxHeight/maxSelectHeight)
            for histPoint in bestHistogram[0]:
                bestScaledHistogram[0].append(histPoint * maxHeight / maxBestHeight)
            # print histogram, len(histogram[0])
            # print selectHistogram, len(selectHistogram[0])
            # make gnuplot version if possible
            if gnuplotAvailable:
                # plotter = Gnuplot.Gnuplot(debug=0)
                xVals = []
                for cutoff in range(2 + int(histogram[1] / interval)):
                    xVals.append(cutoff * interval)
                graphData = Gnuplot.Data(xVals, histogram[0], title="All")
                # if comps[colIdx] == 'min':
                #  graphSelectData = Gnuplot.Data(
                #     xVals, selectScaledHistogram[0],
                #     title="<" + str(compThreshs[colIdx]) + " " +
                #     str(colNamesBonus[sortColNumber-4]) + " Scaled by " +
                #     str(maxHeight/maxSelectHeight))
                # else:
                #  graphSelectData = Gnuplot.Data(
                #      xVals, selectScaledHistogram[0], title=">" +
                #      str(compThreshs[colIdx]) + " " +
                #      str(colNamesBonus[sortColNumber-4]) + " Scaled by " +
                #      str(maxHeight/maxSelectHeight))
                graphBestData = Gnuplot.Data(
                    xVals,
                    bestScaledHistogram[0],
                    title="Best "
                    + str(colNamesBonus[sortColNumber - 4])
                    + " Scaled by "
                    + str(maxHeight / maxBestHeight),
                )
                if realHistogram:
                    graphRealData = Gnuplot.Data(
                        xVals, realScaledHistogram[0], title="Drilled Holes Scaled by " + str(maxHeight / maxRealHeight)
                    )
                graphDataCum = Gnuplot.Data(xVals, statistics.computeCumulativeHistogram(histogram[0]))
                plotter("set terminal png")
                plotter('set output "' + outputFileName + columnName + "." + colNamesBonus[sortColNumber - 4] + '.png"')
                plotter("set data style linespoints")
                plotter("set xrange [" + str(min(xVals) - 0.01) + ":" + str(max(xVals) + 0.01) + "]")
                plotter.xlabel(columnName)
                plotter.ylabel("Count")
                plotter("set multiplot")
                plotter("set key right top")
                if realHistogram:
                    # plotter.plot(
                    #    graphData, graphSelectData, graphBestData, graphRealData)
                    plotter.plot(graphData, graphBestData, graphRealData)
                else:
                    # plotter.plot(graphData, graphSelectData, graphBestData)
                    plotter.plot(graphData, graphBestData)
                plotter("unset multiplot")
def makeHistogramReport(
    residueData, outputFilename="histogram.bfactor", interval=0.1,
    yMaxHisto=0.16):
  fileTemp = open(outputFilename + ".txt", 'w')
  fileTemp.write("LowEndInterval Count\n")
  totalList = []
  betaList = []
  for oneResName, oneResData in residueData.iteritems():
    #assemble into one big list
    if oneResName in aminoAcid3Codes:
      for data in oneResData.values():
        totalList.extend(data)
      betaList.extend(oneResData[carbonBetaCodes[oneResName]])
  resList = {}
  betaResList = {}
  for oneResKey in aminoAcid3Codes:
    resList[oneResKey] = []
    betaResList[oneResKey] = []
    if oneResKey in residueData:
      for data in residueData[oneResKey].values():
        resList[oneResKey].extend(data)
      betaResList[oneResKey].extend(
          residueData[oneResKey][carbonBetaCodes[oneResKey]])
  #now do histogram stuff
  histogram, maxData = statistics.computeNormalizedHistogram(
      totalList, interval, 8.)
  betaHistogram, betaMax = statistics.computeNormalizedHistogram(
      betaList, interval, 8.)
  for index, data in enumerate(histogram):
    fileTemp.write(str(index*interval) + " " + str(data) + "\n")
  fileTemp.close()
  #make gnuplot version if possible
  if gnuplotAvailable:
    plotter = Gnuplot.Gnuplot(debug=0)
    xVals = []
    for cutoff in range(2+int(maxData/interval)):
      xVals.append(cutoff*interval)
    graphData = Gnuplot.Data(xVals, histogram)
    graphDataCum = Gnuplot.Data(
        xVals, statistics.computeCumulativeHistogram(histogram))
    graphDataBeta = Gnuplot.Data(xVals, betaHistogram)
    graphDataBetaCum = Gnuplot.Data(
        xVals, statistics.computeCumulativeHistogram(betaHistogram))
    plotter('set terminal png')
    plotter('set output "' + outputFilename + '.png"')
    plotter('set data style linespoints')
    plotter('set xrange [' + str(min(xVals)-1) + ':' + str(max(xVals)+1) + ']')
    plotter('set yrange [' + str(0.) + ':' + str(yMaxHisto) + ']')
    plotter.xlabel('Travel In Distance')
    plotter.ylabel('Atom Count')
    plotter.plot(graphData)
    plotter('set output "' + outputFilename + '.beta.png"')
    plotter.ylabel('Carbon Beta Atom Count')
    plotter.plot(graphDataBeta)
    plotter('set output "' + outputFilename + '.cumulative.png"')
    plotter('set yrange []')  # automatic
    plotter.ylabel('Cumulative Atom Count')
    plotter.plot(graphDataCum)
    plotter.ylabel('Cumulative Beta Atom Count')
    plotter('set output "' + outputFilename + '.cumulative.beta.png"')
    plotter.plot(graphDataBetaCum)
    plotter.ylabel('Atom Count')
    #now do one for each residue
    plotter('set key right top')
    plotter('set data style lines')
    ylabels = ('Atom Count', 'Carbon Beta Atom Count')
    outputNames = (outputFilename, outputFilename + '.beta')
    histogramData = (resList, betaResList)
    for index in range(len(histogramData)):
      thisResList = histogramData[index]
      outputName = outputNames[index]
      ylabel = ylabels[index]
      plotter.ylabel(ylabel)
      histograms = {}
      maxOverRes = 0.
      for resName in aminoAcid3Codes:
        histogramRes, maxData = statistics.computeHistogram(
            thisResList[resName], interval, maxData)
        histograms[resName] = histogramRes
        maxOverRes = max(maxOverRes, max(histogramRes))
      plotter('set yrange [0:' + str(maxOverRes+1000) + ']')
      resGraphDatas = [], []
      lowGraphDatas = [], []
      highGraphDatas = [], []
      for resName in aminoAcid3Codes:
        plotter('set output "' + outputName + "." + resName + '.png"')
        plotter('set yrange [0:' + str(maxOverRes + 1000) + ']')
        histogramRes = histograms[resName]
        resGraphDatas[0].append(
            Gnuplot.Data(xVals, histogramRes, title=resName))
        plotter.plot(resGraphDatas[0][-1])
        plotter(
            'set output "' + outputName + "." + resName + '.cumulative.png"')
        plotter('set yrange [0:1]')
        cumData = Gnuplot.Data(
            xVals, statistics.computeCumulativeHistogram(histogramRes),
            title=resName)
        plotter.plot(cumData)
        resGraphDatas[1].append(cumData)
        if resName in highCodes:
          highGraphDatas[0].append(resGraphDatas[0][-1])
          highGraphDatas[1].append(cumData)
        elif resName in lowCodes:
          lowGraphDatas[0].append(resGraphDatas[0][-1])
          lowGraphDatas[1].append(cumData)
      #very stupid hack... plot() is dumb
      outNames = (
          'set output "' + outputName + ".residues" + '.png"',
          'set output "' + outputName + ".residues" + '.cumulative.png"')
      lowNames = (
          'set output "' + outputName + ".residues.low" + '.png"',
          'set output "' + outputName + ".residues.low" + '.cumulative.png"')
      highNames = (
          'set output "' + outputName + ".residues.high" + '.png"',
          'set output "' + outputName + ".residues.high" + '.cumulative.png"')
      ranges = (
          'set yrange [0:' + str(maxOverRes+1000) + ']', 'set yrange [0:1]')
      for count, resGraphData in enumerate(resGraphDatas):
        plotter(outNames[count])
        plotter('set key right bottom')
        plotter(ranges[count])
        plotter.plot(
            resGraphData[0], resGraphData[1], resGraphData[2],
            resGraphData[3], resGraphData[4], resGraphData[5],
            resGraphData[6], resGraphData[7], resGraphData[8],
            resGraphData[9], resGraphData[10], resGraphData[11],
            resGraphData[12], resGraphData[13], resGraphData[14],
            resGraphData[15], resGraphData[16], resGraphData[17],
            resGraphData[18], resGraphData[19])
        lowGraphData = lowGraphDatas[count]
        plotter(lowNames[count])
        plotter.plot(
            lowGraphData[0], lowGraphData[1], lowGraphData[2],
            lowGraphData[3], lowGraphData[4], lowGraphData[5],
            lowGraphData[6], lowGraphData[7], lowGraphData[8],
            lowGraphData[9], lowGraphData[10])
        highGraphData = highGraphDatas[count]
        plotter(highNames[count])
        plotter.plot(
            highGraphData[0], highGraphData[1], highGraphData[2],
            highGraphData[3], highGraphData[4], highGraphData[5],
            highGraphData[6], highGraphData[7], highGraphData[8])
      #limits on x dim
      outNames2 = (
          'set output "' + outputName + ".residues16" + '.png"',
          'set output "' + outputName + ".residues" + '.cumulative16.png"')
      for count, resGraphData in enumerate(resGraphDatas):
        plotter(outNames2[count])
        plotter('set key right bottom')
        plotter('set xrange [1:6]')
        plotter(ranges[count])
        plotter.plot(
            resGraphData[0], resGraphData[1], resGraphData[2],
            resGraphData[3], resGraphData[4], resGraphData[5],
            resGraphData[6], resGraphData[7], resGraphData[8],
            resGraphData[9], resGraphData[10], resGraphData[11],
            resGraphData[12], resGraphData[13], resGraphData[14],
            resGraphData[15], resGraphData[16], resGraphData[17],
            resGraphData[18], resGraphData[19])