Python computeMean Beispiele, statistics.computeMean Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: dot.py Projekt: ryancoleman/traveldistance

 def refinePockets(
     self, columnList, columnsToMean, columnsToStddev,
     resColNums, sizeCol, selfScores,
     sizeMin=-1, sizeMax=10000000000, doSelfScore=True,
     justNodes=None, matchList=None, possNodes=None, notTheseNodes=None):
   '''finds the pocket with the highest mean difference, tries to find a better
   pocket for that tree, iterate until optimum?'''
   #go through matchlist, max lists of dists for each node
   if notTheseNodes is None:
     notTheseNodes = []
   justNodesToScores = {}
   nodeToTree = {}
   for aMatch in matchList:
     tmData1, tmData2, node1, node2, score = aMatch  # unpack
     for aNode in (node1, node2):
       try:
         justNodesToScores[aNode].append(score)
       except KeyError:
         justNodesToScores[aNode] = [score]
     nodeToTree[node1] = tmData1
     nodeToTree[node2] = tmData2
   justNodesMeans = []
   for justNode in justNodesToScores.keys():
     if justNode not in notTheseNodes:
       meanScore = statistics.computeMean(justNodesToScores[justNode])
       justNodesMeans.append((justNode, meanScore))
   if 0 == len(justNodesMeans):  # nothing to do
     return False, None
   justNodesMeans.sort(key=operator.itemgetter(1))  # sort to find biggest
   worstNode, worstScore = justNodesMeans[-1]  # unpack the one to fix
   worstTree = nodeToTree[worstNode]
   newPossNodes = possNodes[worstTree]
   possNodeScores = {}
   for possNode in newPossNodes:
     if possNode.attributes[sizeCol] < sizeMax and \
         possNode.attributes[sizeCol] > sizeMin:
       possNodeScores[possNode] = []
       for tmData in self.tmDataList:
         if tmData != worstTree:  # don't do this tree against itself
           justNode = justNodes[tmData]
           score = compareColumns(
               justNode, possNode, columnList, columnsToMean, columnsToStddev)
           totalScore = score
           if doSelfScore:
             totalScore += min(selfScores[node1], selfScores[node2])
           possNodeScores[possNode].append(totalScore)
   possNodeMean = []
   for possNode in newPossNodes:
     if possNode.attributes[sizeCol] < sizeMax and \
         possNode.attributes[sizeCol] > sizeMin:
       meanScore = statistics.computeMean(possNodeScores[possNode])
       possNodeMean.append((possNode, meanScore))
   possNodeMean.sort(key=operator.itemgetter(1))
   newBestNode = possNodeMean[0][0]
   if worstNode != newBestNode:
     justNodes[worstTree] = newBestNode
     return True, worstNode  # indicates change
   else:
     return False, worstNode  # the node was already the best

Beispiel #2

0

Datei anzeigen

Datei: tstCurvature.py Projekt: ryancoleman/traveldistance

def tstEdgeCurvature(trianglePoint, pointXyz, pointTriangle, pointNeighbor):
  '''for each edge, calculate the angle between the triangles around it.
  calculate point curvature based on average of these for each point'''
  triXyz = {}
  for triPtList in trianglePoint:
    tri = triPtList[0]
    xyz = []
    for pt in triPtList[1:]:
      xyz.append(pointXyz[pt-1][1:])
    triXyz[tri] = xyz
  edgeAngle = {}  # store edge angles as they are found so don't duplicate work
  pointMeanAngle = []  # once all edges found, find mean, store in tst format
  pointWeightedMeanAngle = []  # weight by edge length
  for pointNeighborList in pointNeighbor:
    mainPt = pointNeighborList[0]
    angles = []
    weightedAngles = []
    for otherPt in pointNeighborList[2:]:  # pN[1] is count
      ptList = [mainPt, otherPt]
      ptList.sort()
      ptTuple = tuple(ptList)  # canonicalized format
      edgeLength = geometry.distL2(
          pointXyz[mainPt-1][1:], pointXyz[otherPt-1][1:])
      if ptTuple in edgeAngle:  # already done
        angles.append(edgeAngle[ptTuple])
        weightedAngles.append(edgeAngle[ptTuple] * edgeLength)
      else:  # have to compute it
        mainTris = set(pointTriangle[mainPt-1][2:])
        otherTris = set(pointTriangle[otherPt-1][2:])
        tris = list(mainTris.intersection(otherTris))
        #will almost always be 2
        #for now assume only 2
        normalA = geometry.getTriNormalList(triXyz[tris[0]])
        normalB = geometry.getTriNormalList(triXyz[tris[1]])
        unsignedAngle = geometry.getAngle(normalA, normalB)  # unsigned
        centerTriA = geometry.getAverage(triXyz[tris[0]])
        planeA = geometry.calculatePlaneD(normalA, centerTriA)
        ptsB = set(trianglePoint[tris[1]-1][1:])
        edgePts = set(ptList)
        otherB = pointXyz[list(ptsB.difference(edgePts))[0]-1][1:]
        side = geometry.checkPlaneSide(normalA+[planeA], otherB)
        if side:
          angle = - unsignedAngle * 180 / math.pi   # concave negative
        else:
          angle = unsignedAngle * 180 / math.pi  # convex positive
        edgeAngle[ptTuple] = angle
        angles.append(angle)
        weightedAngles.append(angle*edgeLength)
    pointMeanAngle.append([mainPt, statistics.computeMean(angles)])
    pointWeightedMeanAngle.append(
        [mainPt, statistics.computeMean(weightedAngles)])
  return edgeAngle, pointMeanAngle, pointWeightedMeanAngle

Beispiel #3

0

Datei anzeigen

Datei: tm3summarize.py Projekt: ryancoleman/traveldistance

def summarizeOneFeature(tmDataList, columnName, intervals=50, outName="a.txt"):
  '''takes that column, makes a histogram for each structure'''
  outFile = open(outName, 'w')
  columnNum = tmDataList[0].titleToColumn(columnName)
  treeData = {}
  overallMax = 0.
  for tm3tree in tmDataList:
    data = tm3tree.getListColumn(columnNum)
    overallMax = max(overallMax, max(data))
    treeData[tm3tree] = data
  if intervals == "max":
    intervals = overallMax  # 1 per
  interval = overallMax/intervals  # number of intervals desired
  #print a header
  outFile.write("name\tcount\tmean\tstddev\t")
  currentOut = 0.
  while currentOut < overallMax:
    outFile.write(str(currentOut) + "\t")
    currentOut += interval
  outFile.write("\n")
  for tm3tree in tmDataList:
    tm3data = treeData[tm3tree]
    avgData = statistics.computeMean(tm3data)
    stddevData = statistics.computeStdDev(tm3data, avgData)
    histo, outMax = statistics.computeHistogram(tm3data, interval, overallMax)
    outFile.write(tm3tree.inputFileName + "\t")
    outFile.write(str(len(tm3data)) + "\t")
    outFile.write(str(avgData) + "\t")
    outFile.write(str(stddevData) + "\t")
    for histoCount in histo:
      outFile.write(str(histoCount) + "\t")
    outFile.write("\n")
  outFile.close()

Beispiel #4

0

Datei anzeigen

Datei: analyze_lists_not_paired.py Projekt: ryancoleman/analysis-scripts

def analyzelistsunpaired(fileNames, numTests=1000000):
  '''somehow these files encode 2 matching lists. either one file with 2 columns
  or 2 files with one column each. or something.
  if there is only one value for one list, replicate it to the length of the
  other one.'''
  lists = [[], []]
  for fileCount, fileName in enumerate(fileNames):
    for line in open(fileName, 'r'):
      tokens = string.split(string.strip(line))
      if len(tokens) == 1:
        lists[fileCount].append(float(tokens[0]))
      else:
        for tokenCount, token in enumerate(tokens):
          lists[tokenCount].append(float(token))
  diffMean, pVal1, pVal2 = statistics.pvalueDiffMeansLazy(
      lists[0], lists[1], numTests)
  cohenD = statistics.cohenEffectSize(lists[0], lists[1])
  print "mean1, mean2, diffMean, pVal1, pVal2, cohenD"
  print statistics.computeMean(lists[0]), statistics.computeMean(lists[1]), \
      diffMean, pVal1, pVal2, cohenD

Beispiel #5

0

Datei anzeigen

Datei: analyzePdbB.py Projekt: ryancoleman/traveldistance

def makeMeanPerProteinReport(pdbs, outName):
  '''pdbs is a list of dict of dicts, outname is filename'''
  outFile = open(outName, 'w')
  for pdb in pdbs:
    totalList = []
    for resList in pdbs[pdb].itervalues():
      for atomList in resList.itervalues():
        totalList.extend(atomList)
    avg = statistics.computeMean(totalList)
    outFile.write(pdb + "\t")
    outFile.write(str(avg) + "\n")
  outFile.close()

Beispiel #6

0

Datei anzeigen

Datei: tm3.py Projekt: ryancoleman/traveldistance

def calcColumnsMeanStddev(columnList, tmDataList):
    """returns a dict of column number to mean and another dict to stddev."""
    columnsToMean = {}
    columnsToStddev = {}
    for column in columnList:
        colData = []
        for tmData in tmDataList:
            colData.extend(tmData.getListColumn(column))
        colAvg = statistics.computeMean(colData)
        colStddev = statistics.computeStdDev(colData, colAvg)
        columnsToMean[column] = colAvg
        columnsToStddev[column] = colStddev
    return columnsToMean, columnsToStddev

Beispiel #7

0

Datei anzeigen

Datei: paths.py Projekt: ryancoleman/traveldistance

def averageTheta(path):
  '''calculates the mean theta of ab to bc for each triple of points'''
  if len(path) <= 2:
    return [], 0  # no reason to count, not enough nodes
  thetas = []
  firstPt = path[0][1:4]  # first node
  secondPt = path[1][1:4]  # second node
  for node in path[2:]:
    firstVec = geometry.getVector(secondPt, firstPt)
    secondVec = geometry.getVector(node[1:4], secondPt)
    theta = geometry.getAngle(firstVec, secondVec)
    thetas.append(theta)
    firstPt = secondPt
    secondPt = node[1:4]
  averageTheta = statistics.computeMean(thetas)
  return thetas, averageTheta

Beispiel #8

0

Datei anzeigen

Datei: tstCurvature.py Projekt: ryancoleman/traveldistance

  return edgeAngle, pointMeanAngle, pointWeightedMeanAngle

#this is main
if -1 != string.find(sys.argv[0], "tstCurvature.py"):
  for tstFileName in sys.argv[1:]:
    tstD = tstdata.tstData(
        tstFileName, necessaryKeys=tstdata.tstData.necessaryKeysForCurve)
    eA, pA, pWA = tstEdgeCurvature(
        tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_XYZ'],
        tstD.dict['POINT_TRIANGLE'], tstD.dict['POINT_NEIGHBOR'])
    '''
    #append curvature to tst file
    tstFile = open(tstFileName, 'a')
    tstdata.writeEntrySingleFloat(pWA, "POINT_CURVATURE_EDGE LIST", \
                            "END POINT_CURVATURE_EDGE", tstFile)
    tstFile.close()
    '''
    curves, absCurves = [], []
    for pointWeightCurv in pWA:
      curves.append(pointWeightCurv[1])
      absCurves.append(abs(pointWeightCurv[1]))
    meanCurv = statistics.computeMean(curves)
    meanAbsCurv = statistics.computeMean(absCurves)
    curves, absCurves = [], []
    for pointWeightCurv in eA.values():
      curves.append(pointWeightCurv)
      absCurves.append(abs(pointWeightCurv))
    meanCurvE = statistics.computeMean(curves)
    meanAbsCurvE = statistics.computeMean(absCurves)
    print tstFileName, meanCurv, meanAbsCurv, meanCurvE, meanAbsCurvE

Beispiel #9

0

Datei anzeigen

Datei: analyzePdbB.py Projekt: ryancoleman/traveldistance

def makeAtomReport(residueData, outputFilename="atom.bfactor", runGraphs=True):
  residueNames = residueData.keys()
  residueNames.sort()
  fileTemp = open(outputFilename + '.txt', 'w')
  fileTemp.write("ResidueName AtomName Mean StdDev Low High Count\n")
  resAtomAverage = {}
  for residueName in residueNames:
    resAtomAverage[residueName] = {}
    atomNames = residueData[residueName].keys()
    atomNames.sort()
    for atomName in atomNames:
      totalList = residueData[residueName][atomName]
      average = statistics.computeMean(totalList)
      resAtomAverage[residueName][atomName] = average
      stddev = statistics.computeStdDev(totalList, average)
      fileTemp.write(
          residueName + " " + atomName + " " + str(average) + " " +
          str(stddev) + " " + str(min(totalList)) + " " +
          str(max(totalList)) + " " + str(len(totalList)) + "\n")
  fileTemp.close()
  if gnuplotAvailable and runGraphs:
    #first make backbone-sidechain report
    plotter = Gnuplot.Gnuplot(debug=0)
    yLabels = '('
    yDataBackbone, yDataSidechain = [], []
    yDataCa, yDataCb = [], []
    for index, code in enumerate(aminoAcid3Codes):
      yLabels += '"' + str(code) + '" ' + str(index)
      if index != len(aminoAcid3Codes) - 1:
        yLabels += ', '
      backValues, sideValues = [], []
      caValues, cbValues = [], []
      try:
        for key, values in residueData[code].iteritems():
          if string.strip(key) in backboneAtomCodes:
            backValues.extend(values)
          else:
            sideValues.extend(values)
          if string.strip(key) == caCode:
            caValues.extend(values)
          elif string.strip(key) == cbCode:
            cbValues.extend(values)
      except KeyError:  # sometimes one residue won't be represented
        pass  # but that is okay
      if len(backValues) == 0:
        yDataBackbone.append(0)
      else:
        yDataBackbone.append(sum(backValues)/float(len(backValues)))
      if len(sideValues) == 0:
        yDataSidechain.append(0)
      else:
        yDataSidechain.append(sum(sideValues)/float(len(sideValues)))
      if len(caValues) == 0:
        yDataCa.append(0)
      else:
        yDataCa.append(sum(caValues)/float(len(caValues)))
      if len(cbValues) == 0:
        yDataCb.append(0)
      else:
        yDataCb.append(sum(cbValues)/float(len(cbValues)))
      if len(backValues + sideValues + caValues + cbValues) > 0:
        makeAminoAcidHistogram(
            plotter, backValues, sideValues, caValues, cbValues,
            outputFilename + "." + str(code))
    yLabels += ')'
    graphDataBackbone = Gnuplot.Data(range(20), yDataBackbone, title="Backbone")
    graphDataSidechain = Gnuplot.Data(
        range(20), yDataSidechain, title="Sidechain")
    graphDataCa = Gnuplot.Data(range(20), yDataCa, title="C-alpha")
    graphDataCb = Gnuplot.Data(range(20), yDataCb, title="C-beta")
    plotter('set terminal png')
    plotter('set output "' + outputFilename + '.png"')
    plotter('set data style points')
    plotter('set key right top')
    plotter('set xtics ' + yLabels)
    plotter(
        'set yrange [' + str(min(yDataBackbone + yDataSidechain) - 0.5) +
        ':' + str(max(yDataBackbone+yDataSidechain)+0.5) + ']')
    plotter('set xrange [-1:20]')
    plotter.xlabel('Residue')
    plotter.ylabel('Mean Travel In Distance')
    plotter.plot(graphDataBackbone, graphDataSidechain)
    plotter('set output "' + outputFilename + '.ab.png"')
    if "buried" in outputFilename:
      plotter('set yrange [' + str(min(yDataCa + yDataCb) - 0.5) + ':6.]')
    else:
      plotter(
          'set yrange [' + str(min(yDataCa + yDataCb) - 0.5) + ':' +
          str(max(yDataCa + yDataCb) + 0.5) + ']')
    plotter.plot(graphDataCa, graphDataCb)

Beispiel #10

0

Datei anzeigen

Datei: analyzePdbB.py Projekt: ryancoleman/traveldistance

def makeResidueReport(
    residueData, outputFilename="residue.bfactor",
    maxY=False, maxYBeta=False, runGraphs=False):
  #residueNames = residueData.keys()
  residueNames = aminoAcid3Codes
  residueNames.sort()
  fileTemp = open(outputFilename + ".txt", 'w')
  fileTemp.write("ResidueName Mean StdDev Low High Count\n")
  averages, stddevs = {}, {}
  betaAverages, betaStddevs = {}, {}
  for residueName in residueNames:
    #assemble into one big list
    totalList = []
    if residueName in residueData:
      for data in residueData[residueName].values():
        totalList.extend(data)
    average = statistics.computeMean(totalList)
    averages[residueName] = average
    stddev = statistics.computeStdDev(totalList, average)
    stddevs[residueName] = stddev
    betaList = []
    if residueName in residueData:
      data = residueData[residueName]
      betaList.extend(data[carbonBetaCodes[residueName]])
    else:
      data = []
    if len(betaList) > 0:
      betaAvg = statistics.computeMean(betaList)
      betaAverages[residueName] = betaAvg
      betaStddevs[residueName] = statistics.computeStdDev(betaList, betaAvg)
    if len(totalList) > 0:
      fileTemp.write(
          residueName + " " + str(average) + " " + str(stddev) + " " +
          str(min(totalList)) + " " + str(max(totalList)) + " " +
          str(len(totalList)) + "\n")
    else:
      fileTemp.write(
          residueName + " " + str(average) + " " + str(stddev) + " " +
          str(0.) + " " + str(0.) + " " + str(0.) + "\n")
  fileTemp.close()
  if gnuplotAvailable and runGraphs:
    plotter = Gnuplot.Gnuplot(debug=0)
    yLabels = '('
    yData, yError, yMin, yMax = [], [], 10, 0
    yBetaData, yBetaError, yBetaMin, yBetaMax = [], [], 10, 0
    for index, code in enumerate(aminoAcid3Codes):
      yLabels += '"' + str(code) + '" ' + str(index)
      if index != len(aminoAcid3Codes) - 1:
        yLabels += ', '
      if code in averages:
        yData.append(averages[code])
        yError.append(stddevs[code])
        yMin = min(yMin, yData[-1]-yError[-1])
        yMax = max(yMax, yData[-1]+yError[-1])
        yBetaData.append(betaAverages[code])
        yBetaError.append(betaStddevs[code])
        yBetaMin = min(yBetaMin, yBetaData[-1]-yBetaError[-1])
        yBetaMax = max(yBetaMax, yBetaData[-1]+yBetaError[-1])
      else:  # none of that residue
        yData.append(0)
        yError.append(0)
        yBetaData.append(0)
        yBetaError.append(0)
    yLabels += ')'
    graphData = Gnuplot.Data(range(20), yData, yError)
    plotter('set terminal png')
    plotter('set output "' + outputFilename + '.png"')
    plotter('set data style yerrorbars')
    plotter('set boxwidth 0.9 absolute')
    plotter('set xtics ' + yLabels)
    if maxY is False:
      plotter('set yrange [' + str(yMin-0.2) + ':' + str(yMax+0.2) + ']')
    else:
      plotter('set yrange [0:' + str(maxY) + ']')
    plotter('set xrange [-1:20]')
    plotter.xlabel('Residue')
    plotter.ylabel('Mean Travel In Distance')
    plotter.plot(graphData)
    #do another graph with just carbon-betas
    plotter('set output "' + outputFilename + '.beta.png"')
    graphDataBeta = Gnuplot.Data(range(20), yBetaData, yBetaError)
    plotter.ylabel('Mean Travel In Distance of Carbon Beta')
    if maxYBeta is False:
      plotter(
          'set yrange [' + str(yBetaMin-0.2) + ':' + str(yBetaMax+0.2) + ']')
    else:
      plotter('set yrange [0:' + str(maxYBeta) + ']')
    plotter.plot(graphDataBeta)

Beispiel #11

0

Datei anzeigen

Datei: analyzePdbB.py Projekt: ryancoleman/traveldistance

def makeCompareResidueReport(
    residueBoth, outputFilename="residue.bfactor", maxY=False, maxYBeta=False,
    numTests=9):
  ranges = [-0.3, 0.6]
  residueNames = []
  for residueName in residueBoth[0].keys() + residueBoth[1].keys():
    if residueName not in residueNames:
      residueNames.append(residueName)
  residueNames.sort()
  #residueNames = aminoAcid3Codes #for now ignore what is in the files
  fileTemp = open(outputFilename + ".txt", 'w')
  fileTemp.write("ResidueName AtomName Mean StdDev Low High Count\n")
  fileTemp2 = open(outputFilename + ".pvals.txt", 'w')
  fileTemp2.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n")
  fileTemp3 = open(outputFilename + ".pvals.beta.txt", 'w')
  fileTemp3.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n")
  averages, stddevs = ({}, {}), ({}, {})
  betaAverages, betaStddevs = ({}, {}), ({}, {})
  totalLists, betaLists = ({}, {}), ({}, {})
  for residueName in residueNames:
    totalList = [], []
    betaList = [], []
    for indexSet, residueData in enumerate(residueBoth):
      try:
        for data in residueData[residueName].values():
          totalList[indexSet].extend(data)
        totalLists[indexSet][residueName] = totalList[indexSet]
        average = statistics.computeMean(totalList[indexSet])
        averages[indexSet][residueName] = average
        #print average, residueName
        stddev = statistics.computeStdDev(totalList[indexSet], average)
        stddevs[indexSet][residueName] = stddev
        data = residueData[residueName]
        betaList[indexSet].extend(data[carbonBetaCodes[residueName]])
        betaLists[indexSet][residueName] = betaList[indexSet]
        if len(betaList[indexSet]) > 0:
          betaAvg = statistics.computeMean(betaList[indexSet])
          #print betaAvg, residueName
          betaAverages[indexSet][residueName] = betaAvg
          betaStddevs[indexSet][residueName] = statistics.computeStdDev(
              betaList[indexSet], betaAvg)
        fileTemp.write(
            residueName + " " + str(average) + " " + str(stddev) + " " +
            str(min(totalList)) + " " + str(max(totalList)) + " " +
            str(len(totalList)) + "\n")
      except (ZeroDivisionError, KeyError):
        pass  # probably don't really need this residue anyway
  fileTemp.close()
  for index, code in enumerate(aminoAcid3Codes):  # now do the pvalue tests
    meanA = averages[0][code]
    meanB = averages[1][code]
    listA = totalLists[0][code]
    listB = totalLists[1][code]
    pvals = statistics.pvalueDiffMeans(listA, listB, meanA-meanB, numTests)
    #fileTemp2.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n")
    fileTemp2.write(code + " " + str(meanA-meanB) + " " + str(meanA) + " ")
    fileTemp2.write(str(meanB) + " " + str(pvals[0]) + " " + str(pvals[1]))
    fileTemp2.write("\n")
    meanA = betaAverages[0][code]
    meanB = betaAverages[1][code]
    listA = betaLists[0][code]
    listB = betaLists[1][code]
    pvals = statistics.pvalueDiffMeans(listA, listB, meanA-meanB, numTests)
    fileTemp3.write(code + " " + str(meanA-meanB) + " " + str(meanA) + " ")
    fileTemp3.write(str(meanB) + " " + str(pvals[0]) + " " + str(pvals[1]))
    fileTemp3.write("\n")
  fileTemp2.close()
  fileTemp3.close()
  if gnuplotAvailable:
    plotter = Gnuplot.Gnuplot(debug=0)
    yLabels = '('
    yData, yError, yMin, yMax = [], [], 10, 0
    yBetaData, yBetaError, yBetaMin, yBetaMax = [], [], 10, 0
    for index, code in enumerate(aminoAcid3Codes):
      yLabels += '"' + str(code) + '" ' + str(index)
      if index != len(aminoAcid3Codes) - 1:
        yLabels += ', '
      yData.append(averages[0][code] - averages[1][code])
      #yError.append(stddevs[0][code])
      #yMin = min(yMin, yData[-1] - yError[-1])
      #yMax = max(yMax, yData[-1] + yError[-1])
      yMin = min(yMin, yData[-1])
      yMax = max(yMax, yData[-1])
      #print betaAverages[0][code]
      #print betaAverages[1][code]
      betaAvgDiff = 0.
      try:
        betaAvg0 = betaAverages[0][code]
        betaAvg1 = betaAverages[1][code]
        betaAvgDiff = betaAvg0 - betaAvg1
      except KeyError:
        print code
        betaAvgDiff = 0.
      yBetaData.append(betaAvgDiff)
      #yBetaError.append(betaStddevs[0][code])
      yBetaMin = min(yBetaMin, yBetaData[-1])
      yBetaMax = max(yBetaMax, yBetaData[-1])
    yLabels += ')'
    graphData = Gnuplot.Data(range(20), yData)
    plotter('set terminal png')
    plotter('set output "' + outputFilename + '.png"')
    plotter('set data style points')
    plotter('set boxwidth 0.9 absolute')
    plotter('set xtics ' + yLabels)
    if ranges:
      plotter('set yrange [' + str(ranges[0]) + ':' + str(ranges[1]) + ']')
    elif maxY is False:
      plotter('set yrange [' + str(yMin-0.2) + ':' + str(yMax+0.2) + ']')
    else:
      plotter('set yrange [0:' + str(maxY) + ']')
    plotter('set xrange [-1:20]')
    plotter.xlabel('Residue')
    plotter.ylabel('Mean Travel In Distance')
    plotter.plot(graphData)
    #do another graph with just carbon-betas
    plotter('set output "' + outputFilename + '.beta.png"')
    graphDataBeta = Gnuplot.Data(range(20), yBetaData)
    plotter.ylabel('Mean Travel In Distance of Carbon Beta')
    if ranges:
      plotter('set yrange [' + str(ranges[0]) + ':' + str(ranges[1]) + ']')
    elif maxYBeta is False:
      plotter(
          'set yrange [' + str(yBetaMin-0.2) + ':' + str(yBetaMax+0.2) + ']')
    else:
      plotter('set yrange [0:' + str(maxYBeta) + ']')
    plotter.plot(graphDataBeta)

Beispiel #12

0

Datei anzeigen

Datei: analyzePdbB.py Projekt: ryancoleman/traveldistance

def makeCompareResidueReportAlternate(
    pdbs, outputFilename="residue.bfactor", numTests=9999,
    correctionAll=0., correctionBeta=0.):
  '''different way to do p-vals, instead of permuting all data, permute the
  pairs of hyp/meso pdb files.'''
  residueNames = aminoAcid3Codes  # for now ignore what is in the files
  fileTemp2 = open(outputFilename + ".pvals.txt", 'w')
  fileTemp2.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n")
  fileTemp3 = open(outputFilename + ".pvals.beta.txt", 'w')
  fileTemp3.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n")
  #first find means
  means = [{}, {}]
  betaMeans = [{}, {}]
  overallList = [[], []]
  overallBetaList = [[], []]
  totalMeans, totalBetaMeans = [0., 0.], [0., 0.]
  for code in residueNames:
    betaKsLists = [[], []]
    for lindex in range(2):  # either a or b
      totalList, betaList = [], []
      for pdbResidues in pdbs[lindex]:
        if code in pdbResidues:
          for atomValues in pdbResidues[code].values():
            totalList.extend(atomValues)
      means[lindex][code] = statistics.computeMean(totalList)
      for pdbResidues in pdbs[lindex]:
        if code in pdbResidues:
          betaList.extend(pdbResidues[code][carbonBetaCodes[code]])
      betaKsLists[lindex] = betaList
      betaMeans[lindex][code] = statistics.computeMean(betaList)
      overallList[lindex].extend(totalList)
      overallBetaList[lindex].extend(betaList)
    #use betaKsLists to compute ks stuff
  for lindex in range(2):  # either a or b
    totalMeans[lindex] = statistics.computeMean(overallList[lindex])
    totalBetaMeans[lindex] = statistics.computeMean(overallBetaList[lindex])
  #print means, betaMeans
  pValueCounts = [{}, {}]  # first is above, second is below
  pValueBetaCounts = [{}, {}]
  for code in residueNames+["ALL"]:  # initialize counts, even for overall total
    for aboveBelow in range(2):
      pValueCounts[aboveBelow][code] = 1
      pValueBetaCounts[aboveBelow][code] = 1
  for test in xrange(numTests):
    testMeans = [{}, {}]
    testBetaMeans = [{}, {}]
    overallList = [[], []]
    overallBetaList = [[], []]
    totalTestMeans, totalTestBetaMeans = [0., 0.], [0., 0.]
    newPdbs = statistics.permuteLists(pdbs)
    for code in residueNames:
      for lindex in range(2):  # either a or b
        totalList, betaList = [], []
        for pdbResidues in newPdbs[lindex]:
          if code in pdbResidues:
            for atomValues in pdbResidues[code].values():
              totalList.extend(atomValues)
        testMeans[lindex][code] = statistics.computeMean(totalList)
        for pdbResidues in newPdbs[lindex]:
          if code in pdbResidues:
            betaList.extend(pdbResidues[code][carbonBetaCodes[code]])
        testBetaMeans[lindex][code] = statistics.computeMean(betaList)
        overallList[lindex].extend(totalList)
        overallBetaList[lindex].extend(betaList)
    for lindex in range(2):  # either a or b
      totalTestMeans[lindex] = statistics.computeMean(overallList[lindex])
      totalTestBetaMeans[lindex] = \
          statistics.computeMean(overallBetaList[lindex])
    for code in residueNames:  # calc pval for each residue
      testMeanDiff = testMeans[0][code] - testMeans[1][code]
      origMeanDiff = means[0][code] - means[1][code] - correctionAll
      if origMeanDiff <= testMeanDiff:
        pValueCounts[0][code] += 1
      if origMeanDiff >= testMeanDiff:
        pValueCounts[1][code] += 1
      testMeanDiff = testBetaMeans[0][code] - testBetaMeans[1][code]
      origMeanDiff = betaMeans[0][code] - betaMeans[1][code] - correctionBeta
      if origMeanDiff <= testMeanDiff:
        pValueBetaCounts[0][code] += 1
      if origMeanDiff >= testMeanDiff:
        pValueBetaCounts[1][code] += 1
    code = "ALL"  # fake residue name for overall
    testMeanDiff = totalTestMeans[0] - totalTestMeans[1]
    origMeanDiff = totalMeans[0] - totalMeans[1] - correctionAll
    if origMeanDiff <= testMeanDiff:
      pValueCounts[0][code] += 1
    if origMeanDiff >= testMeanDiff:
      pValueCounts[1][code] += 1
    testMeanDiff = totalTestBetaMeans[0] - totalTestBetaMeans[1] - \
        correctionBeta
    origMeanDiff = totalBetaMeans[0] - totalBetaMeans[1]
    if origMeanDiff <= testMeanDiff:
      pValueBetaCounts[0][code] += 1
    if origMeanDiff >= testMeanDiff:
      pValueBetaCounts[1][code] += 1
  for code in residueNames:  # output time
    fileTemp2.write(code + " " + str(means[0][code]-means[1][code]) + " ")
    fileTemp2.write(str(means[0][code]) + " " + str(means[1][code]) + " ")
    fileTemp2.write(str(pValueCounts[0][code]/float(1+numTests)) + " ")
    fileTemp2.write(str(pValueCounts[1][code]/float(1+numTests)) + " ")
    fileTemp2.write("\n")
    fileTemp3.write(
        code + " " + str(betaMeans[0][code]-betaMeans[1][code]) + " ")
    fileTemp3.write(
        str(betaMeans[0][code]) + " " + str(betaMeans[1][code]) + " ")
    fileTemp3.write(str(pValueBetaCounts[0][code]/float(1+numTests)) + " ")
    fileTemp3.write(str(pValueBetaCounts[1][code]/float(1+numTests)) + " ")
    fileTemp3.write("\n")
  code = "ALL"  # fake for overall
  fileTemp2.write("ALL " + str(totalMeans[0]-totalMeans[1]) + " ")
  fileTemp2.write(str(totalMeans[0]) + " " + str(totalMeans[1]) + " ")
  fileTemp2.write(str(pValueCounts[0][code]/float(1+numTests)) + " ")
  fileTemp2.write(str(pValueCounts[1][code]/float(1+numTests)) + " ")
  fileTemp2.write("\n")
  fileTemp3.write("ALL " + str(totalBetaMeans[0]-totalBetaMeans[1]) + " ")
  fileTemp3.write(str(totalBetaMeans[0]) + " " + str(totalBetaMeans[1]) + " ")
  fileTemp3.write(str(pValueBetaCounts[0][code]/float(1+numTests)) + " ")
  fileTemp3.write(str(pValueBetaCounts[1][code]/float(1+numTests)) + " ")
  fileTemp3.write("\n")
  fileTemp2.close()
  fileTemp3.close()
  return totalMeans[0]-totalMeans[1], totalBetaMeans[0]-totalBetaMeans[1]