Python computeHistogram Examples

Programming Language: Python

Namespace/Package Name: statistics

Method/Function: computeHistogram

Examples at hotexamples.com: 4

Python computeHistogram - 4 examples found. These are the top rated real world Python examples of statistics.computeHistogram extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: analyzePdbB.py Project: ryancoleman/traveldistance

def makeAminoAcidHistogram(
    plotter,  backValues, sideValues, caValues, cbValues, outNameAmino,
    interval=0.5):
  '''makes one histogram for one amino acid for the backbone/sidechain values'''
  outTextFileName = outNameAmino + ".res.txt"
  maxBoth = max(backValues+sideValues+caValues+cbValues)
  #print outNameAmino,
  #print len(backValues), len(sideValues), len(caValues), len(cbValues), maxBoth
  histoBack, maxBothRet = statistics.computeHistogram(
      backValues, interval, maxBoth)
  histoSide, maxBothRet = statistics.computeHistogram(
      sideValues, interval, maxBoth)
  histoCa, maxBothRet = statistics.computeHistogram(
      caValues, interval, maxBoth)
  histoCb, maxBothRet = statistics.computeHistogram(cbValues, interval, maxBoth)
  xVals = []
  for cutoff in range(2+int(maxBoth/interval)):
    xVals.append(cutoff*interval)
  graphDataBack = Gnuplot.Data(xVals, histoBack, title="Backbone")
  graphDataSide = Gnuplot.Data(xVals, histoSide, title="Sidechain")
  graphDataCa = Gnuplot.Data(xVals, histoCa, title="C-alpha")
  graphDataCb = Gnuplot.Data(xVals, histoCb, title="C-beta")
  if outTextFileName:
    outTextFile = open(outTextFileName, 'w')
    outTextFile.write("xVal\tback\tside\tca\tcb\tbackN\tsideN\tcaN\tcbN\n")
    for index in range(max(len(xVals), len(histoBack))):
      outTextFile.write(str(xVals[index]) + "\t")
      outTextFile.write(str(histoBack[index]) + "\t")
      outTextFile.write(str(histoSide[index]) + "\t")
      outTextFile.write(str(histoCa[index]) + "\t")
      outTextFile.write(str(histoCb[index]) + "\t")
      if sum(histoBack) > 0.:
        outTextFile.write(str(float(histoBack[index])/sum(histoBack)) + "\t")
      else:
        outTextFile.write("0.\t")
      if sum(histoSide) > 0.:
        outTextFile.write(str(float(histoSide[index])/sum(histoSide)) + "\t")
      else:
        outTextFile.write("0.\t")
      if sum(histoCa) > 0.:
        outTextFile.write(str(float(histoCa[index])/sum(histoCa)) + "\t")
      else:
        outTextFile.write("0.\t")
      if sum(histoCb) > 0.:
        outTextFile.write(str(float(histoCb[index])/sum(histoCb)) + "\n")
      else:
        outTextFile.write("0.\n")
    outTextFile.close()
  plotter('set terminal png')
  plotter('set output "' + outNameAmino + '.png"')
  #plotter('set data style boxes')
  plotter('set data style linespoints')
  plotter('set key right top')
  plotter('set xrange [' + str(min(xVals)-1) + ':' + str(max(xVals)+1) + ']')
  plotter(
      'set yrange [' + str(0.) + ':' + str(1.05*max(histoBack+histoSide)) + ']')
  plotter.xlabel('Travel In Distance')
  plotter.ylabel('Atom Count')
  plotter.plot(graphDataBack, graphDataSide, graphDataCa, graphDataCb)

Example #2

Show file

File: tm3summarize.py Project: ryancoleman/traveldistance

def summarizeOneFeature(tmDataList, columnName, intervals=50, outName="a.txt"):
  '''takes that column, makes a histogram for each structure'''
  outFile = open(outName, 'w')
  columnNum = tmDataList[0].titleToColumn(columnName)
  treeData = {}
  overallMax = 0.
  for tm3tree in tmDataList:
    data = tm3tree.getListColumn(columnNum)
    overallMax = max(overallMax, max(data))
    treeData[tm3tree] = data
  if intervals == "max":
    intervals = overallMax  # 1 per
  interval = overallMax/intervals  # number of intervals desired
  #print a header
  outFile.write("name\tcount\tmean\tstddev\t")
  currentOut = 0.
  while currentOut < overallMax:
    outFile.write(str(currentOut) + "\t")
    currentOut += interval
  outFile.write("\n")
  for tm3tree in tmDataList:
    tm3data = treeData[tm3tree]
    avgData = statistics.computeMean(tm3data)
    stddevData = statistics.computeStdDev(tm3data, avgData)
    histo, outMax = statistics.computeHistogram(tm3data, interval, overallMax)
    outFile.write(tm3tree.inputFileName + "\t")
    outFile.write(str(len(tm3data)) + "\t")
    outFile.write(str(avgData) + "\t")
    outFile.write(str(stddevData) + "\t")
    for histoCount in histo:
      outFile.write(str(histoCount) + "\t")
    outFile.write("\n")
  outFile.close()

Example #3

Show file

File: processFoundHoles.py Project: ryancoleman/traveldistance

def processData(dataList, nameList, listPaths, outputFileName="processed.foundholes."):
    # first do the one big summary output file
    bestLists = []
    compCols = [8, 9, 10, 11, 12, 13, 14]
    comps = ["min", "max", "max", "min", "max", "max", "min"]
    compThreshs = [5.0, 0.4, 0.8, 2.5, 0.5, 0.8, 2.5]
    for index, colIdx in enumerate(compCols):
        bestList = []
        for data in dataList:
            if comps[index] == "min":
                bestVal, bestIndex = getMinColumn(data, colIdx)  # index 8 is the prmsd
            else:
                bestVal, bestIndex = getMaxColumn(data, colIdx)  # index 8 is the prmsd
            bestList.append(bestVal)
        bestLists.append(bestList)
        # fileOut = open(outputFileName + colNamesBonus[colIdx-4] + ".best.log", 'w')
        # for index, name in enumerate(nameList):
        #  fileOut.write(name + " " + str(bestList[index]) + "\n")
        # fileOut.close()
    fileOut = open(outputFileName + "overall.best.log", "w")
    fileOut.write("name pRMSD coverage span wRMSD less1 lessRad radiicomp\n")
    for index, name in enumerate(nameList):
        fileOut.write(name + " ")
        for index2 in range(len(compCols)):
            fileOut.write(str(bestLists[index2][index]) + " ")
        fileOut.write("\n")
    fileOut.close()
    for colIdx, sortColNumber in enumerate(compCols):
        bestRankStrings, sortedEvals, backwardsEvals = [], [], []
        for index, data in enumerate(dataList):
            bestRankString, sortedEval, backEval = processDataOne(data, nameList[index], sortColNumber, comps[colIdx])
            bestRankStrings.append(bestRankString)
            sortedEvals.append(sortedEval)
            backwardsEvals.append(backEval)
        fileOut = open(outputFileName + colNamesBonus[sortColNumber - 4] + ".best.rankings.log", "w")
        for bestRankStr in bestRankStrings:
            fileOut.write(bestRankStr + "\n")
        fileOut.close()
        for index, colName in enumerate(colNames):
            fileOut = open(
                outputFileName + "rankings." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w"
            )
            for line in sortedEvals:
                fileOut.write(line[index] + "\n")
            fileOut.close()
            fileOut = open(
                outputFileName + "rankings.reverse." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w"
            )
            for line in backwardsEvals:
                fileOut.write(line[index] + "\n")
            fileOut.close()
    drilledData = [False, False, False, False, [], [], [], [], [], [], [], [], [], []]
    for drilledPath in listPaths:
        drilledData[4].append(float(len(drilledPath)))
        drilledData[5].append(float(paths.pathLength(drilledPath)))
        drilledData[6].append(float(paths.pathMinRadius(drilledPath)))
        drilledData[7].append(float(paths.pathMaxInsideRadius(drilledPath)))
    for column in range(4, 14):
        columnName = colNamesBonus[column - 4]
        for colIdx, sortColNumber in enumerate(compCols):
            # print columnName, colNamesBonus[sortColNumber - 4]
            columnData = []
            selectColumnData = []
            bestColumnData = []
            for data in dataList:
                if comps[colIdx] == "min":
                    bestVal, bestIndex = getMinColumn(data, sortColNumber)
                    selectColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, compThreshs[colIdx]))
                    bestColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, bestVal))
                else:
                    bestVal, bestIndex = getMaxColumn(data, sortColNumber)
                    selectColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, compThreshs[colIdx]))
                    bestColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, bestVal))
                columnData.extend(getOneColumn(data, column))
            if len(drilledData[column]) > 0:
                maxHere = max(max(columnData), max(drilledData[column])) + 1.0
            else:
                maxHere = max(columnData) + 1.0
            interval = maxHere / 40.0
            histogram = statistics.computeHistogram(columnData, interval, maxData=maxHere)
            # selectHistogram = statistics.computeHistogram(
            #    selectColumnData, interval, maxData=histogram[1])
            bestHistogram = statistics.computeHistogram(bestColumnData, interval, maxData=histogram[1])
            if len(drilledData[column]) > 0:
                realHistogram = statistics.computeHistogram(drilledData[column], interval, maxData=histogram[1])
            else:
                realHistogram = False
            # need to scale select part of data to be same height as histogram
            maxHeight = max(histogram[0])
            # maxSelectHeight = max(selectHistogram[0])
            maxBestHeight = max(bestHistogram[0])
            if realHistogram:
                maxRealHeight = max(realHistogram[0])
                realScaledHistogram = [[], realHistogram[1]]
                for histPoint in realHistogram[0]:
                    realScaledHistogram[0].append(histPoint * maxHeight / maxRealHeight)
            # selectScaledHistogram = [[], selectHistogram[1]]
            bestScaledHistogram = [[], bestHistogram[1]]
            # for histPoint in selectHistogram[0]:
            # selectScaledHistogram[0].append(histPoint*maxHeight/maxSelectHeight)
            for histPoint in bestHistogram[0]:
                bestScaledHistogram[0].append(histPoint * maxHeight / maxBestHeight)
            # print histogram, len(histogram[0])
            # print selectHistogram, len(selectHistogram[0])
            # make gnuplot version if possible
            if gnuplotAvailable:
                # plotter = Gnuplot.Gnuplot(debug=0)
                xVals = []
                for cutoff in range(2 + int(histogram[1] / interval)):
                    xVals.append(cutoff * interval)
                graphData = Gnuplot.Data(xVals, histogram[0], title="All")
                # if comps[colIdx] == 'min':
                #  graphSelectData = Gnuplot.Data(
                #     xVals, selectScaledHistogram[0],
                #     title="<" + str(compThreshs[colIdx]) + " " +
                #     str(colNamesBonus[sortColNumber-4]) + " Scaled by " +
                #     str(maxHeight/maxSelectHeight))
                # else:
                #  graphSelectData = Gnuplot.Data(
                #      xVals, selectScaledHistogram[0], title=">" +
                #      str(compThreshs[colIdx]) + " " +
                #      str(colNamesBonus[sortColNumber-4]) + " Scaled by " +
                #      str(maxHeight/maxSelectHeight))
                graphBestData = Gnuplot.Data(
                    xVals,
                    bestScaledHistogram[0],
                    title="Best "
                    + str(colNamesBonus[sortColNumber - 4])
                    + " Scaled by "
                    + str(maxHeight / maxBestHeight),
                )
                if realHistogram:
                    graphRealData = Gnuplot.Data(
                        xVals, realScaledHistogram[0], title="Drilled Holes Scaled by " + str(maxHeight / maxRealHeight)
                    )
                graphDataCum = Gnuplot.Data(xVals, statistics.computeCumulativeHistogram(histogram[0]))
                plotter("set terminal png")
                plotter('set output "' + outputFileName + columnName + "." + colNamesBonus[sortColNumber - 4] + '.png"')
                plotter("set data style linespoints")
                plotter("set xrange [" + str(min(xVals) - 0.01) + ":" + str(max(xVals) + 0.01) + "]")
                plotter.xlabel(columnName)
                plotter.ylabel("Count")
                plotter("set multiplot")
                plotter("set key right top")
                if realHistogram:
                    # plotter.plot(
                    #    graphData, graphSelectData, graphBestData, graphRealData)
                    plotter.plot(graphData, graphBestData, graphRealData)
                else:
                    # plotter.plot(graphData, graphSelectData, graphBestData)
                    plotter.plot(graphData, graphBestData)
                plotter("unset multiplot")

Example #4

Show file

File: analyzePdbB.py Project: ryancoleman/traveldistance

def makeHistogramReport(
    residueData, outputFilename="histogram.bfactor", interval=0.1,
    yMaxHisto=0.16):
  fileTemp = open(outputFilename + ".txt", 'w')
  fileTemp.write("LowEndInterval Count\n")
  totalList = []
  betaList = []
  for oneResName, oneResData in residueData.iteritems():
    #assemble into one big list
    if oneResName in aminoAcid3Codes:
      for data in oneResData.values():
        totalList.extend(data)
      betaList.extend(oneResData[carbonBetaCodes[oneResName]])
  resList = {}
  betaResList = {}
  for oneResKey in aminoAcid3Codes:
    resList[oneResKey] = []
    betaResList[oneResKey] = []
    if oneResKey in residueData:
      for data in residueData[oneResKey].values():
        resList[oneResKey].extend(data)
      betaResList[oneResKey].extend(
          residueData[oneResKey][carbonBetaCodes[oneResKey]])
  #now do histogram stuff
  histogram, maxData = statistics.computeNormalizedHistogram(
      totalList, interval, 8.)
  betaHistogram, betaMax = statistics.computeNormalizedHistogram(
      betaList, interval, 8.)
  for index, data in enumerate(histogram):
    fileTemp.write(str(index*interval) + " " + str(data) + "\n")
  fileTemp.close()
  #make gnuplot version if possible
  if gnuplotAvailable:
    plotter = Gnuplot.Gnuplot(debug=0)
    xVals = []
    for cutoff in range(2+int(maxData/interval)):
      xVals.append(cutoff*interval)
    graphData = Gnuplot.Data(xVals, histogram)
    graphDataCum = Gnuplot.Data(
        xVals, statistics.computeCumulativeHistogram(histogram))
    graphDataBeta = Gnuplot.Data(xVals, betaHistogram)
    graphDataBetaCum = Gnuplot.Data(
        xVals, statistics.computeCumulativeHistogram(betaHistogram))
    plotter('set terminal png')
    plotter('set output "' + outputFilename + '.png"')
    plotter('set data style linespoints')
    plotter('set xrange [' + str(min(xVals)-1) + ':' + str(max(xVals)+1) + ']')
    plotter('set yrange [' + str(0.) + ':' + str(yMaxHisto) + ']')
    plotter.xlabel('Travel In Distance')
    plotter.ylabel('Atom Count')
    plotter.plot(graphData)
    plotter('set output "' + outputFilename + '.beta.png"')
    plotter.ylabel('Carbon Beta Atom Count')
    plotter.plot(graphDataBeta)
    plotter('set output "' + outputFilename + '.cumulative.png"')
    plotter('set yrange []')  # automatic
    plotter.ylabel('Cumulative Atom Count')
    plotter.plot(graphDataCum)
    plotter.ylabel('Cumulative Beta Atom Count')
    plotter('set output "' + outputFilename + '.cumulative.beta.png"')
    plotter.plot(graphDataBetaCum)
    plotter.ylabel('Atom Count')
    #now do one for each residue
    plotter('set key right top')
    plotter('set data style lines')
    ylabels = ('Atom Count', 'Carbon Beta Atom Count')
    outputNames = (outputFilename, outputFilename + '.beta')
    histogramData = (resList, betaResList)
    for index in range(len(histogramData)):
      thisResList = histogramData[index]
      outputName = outputNames[index]
      ylabel = ylabels[index]
      plotter.ylabel(ylabel)
      histograms = {}
      maxOverRes = 0.
      for resName in aminoAcid3Codes:
        histogramRes, maxData = statistics.computeHistogram(
            thisResList[resName], interval, maxData)
        histograms[resName] = histogramRes
        maxOverRes = max(maxOverRes, max(histogramRes))
      plotter('set yrange [0:' + str(maxOverRes+1000) + ']')
      resGraphDatas = [], []
      lowGraphDatas = [], []
      highGraphDatas = [], []
      for resName in aminoAcid3Codes:
        plotter('set output "' + outputName + "." + resName + '.png"')
        plotter('set yrange [0:' + str(maxOverRes + 1000) + ']')
        histogramRes = histograms[resName]
        resGraphDatas[0].append(
            Gnuplot.Data(xVals, histogramRes, title=resName))
        plotter.plot(resGraphDatas[0][-1])
        plotter(
            'set output "' + outputName + "." + resName + '.cumulative.png"')
        plotter('set yrange [0:1]')
        cumData = Gnuplot.Data(
            xVals, statistics.computeCumulativeHistogram(histogramRes),
            title=resName)
        plotter.plot(cumData)
        resGraphDatas[1].append(cumData)
        if resName in highCodes:
          highGraphDatas[0].append(resGraphDatas[0][-1])
          highGraphDatas[1].append(cumData)
        elif resName in lowCodes:
          lowGraphDatas[0].append(resGraphDatas[0][-1])
          lowGraphDatas[1].append(cumData)
      #very stupid hack... plot() is dumb
      outNames = (
          'set output "' + outputName + ".residues" + '.png"',
          'set output "' + outputName + ".residues" + '.cumulative.png"')
      lowNames = (
          'set output "' + outputName + ".residues.low" + '.png"',
          'set output "' + outputName + ".residues.low" + '.cumulative.png"')
      highNames = (
          'set output "' + outputName + ".residues.high" + '.png"',
          'set output "' + outputName + ".residues.high" + '.cumulative.png"')
      ranges = (
          'set yrange [0:' + str(maxOverRes+1000) + ']', 'set yrange [0:1]')
      for count, resGraphData in enumerate(resGraphDatas):
        plotter(outNames[count])
        plotter('set key right bottom')
        plotter(ranges[count])
        plotter.plot(
            resGraphData[0], resGraphData[1], resGraphData[2],
            resGraphData[3], resGraphData[4], resGraphData[5],
            resGraphData[6], resGraphData[7], resGraphData[8],
            resGraphData[9], resGraphData[10], resGraphData[11],
            resGraphData[12], resGraphData[13], resGraphData[14],
            resGraphData[15], resGraphData[16], resGraphData[17],
            resGraphData[18], resGraphData[19])
        lowGraphData = lowGraphDatas[count]
        plotter(lowNames[count])
        plotter.plot(
            lowGraphData[0], lowGraphData[1], lowGraphData[2],
            lowGraphData[3], lowGraphData[4], lowGraphData[5],
            lowGraphData[6], lowGraphData[7], lowGraphData[8],
            lowGraphData[9], lowGraphData[10])
        highGraphData = highGraphDatas[count]
        plotter(highNames[count])
        plotter.plot(
            highGraphData[0], highGraphData[1], highGraphData[2],
            highGraphData[3], highGraphData[4], highGraphData[5],
            highGraphData[6], highGraphData[7], highGraphData[8])
      #limits on x dim
      outNames2 = (
          'set output "' + outputName + ".residues16" + '.png"',
          'set output "' + outputName + ".residues" + '.cumulative16.png"')
      for count, resGraphData in enumerate(resGraphDatas):
        plotter(outNames2[count])
        plotter('set key right bottom')
        plotter('set xrange [1:6]')
        plotter(ranges[count])
        plotter.plot(
            resGraphData[0], resGraphData[1], resGraphData[2],
            resGraphData[3], resGraphData[4], resGraphData[5],
            resGraphData[6], resGraphData[7], resGraphData[8],
            resGraphData[9], resGraphData[10], resGraphData[11],
            resGraphData[12], resGraphData[13], resGraphData[14],
            resGraphData[15], resGraphData[16], resGraphData[17],
            resGraphData[18], resGraphData[19])