예제 #1
0
def analyzeColumn(
    summaryDataColumn, listPdbs, outputFileName, numTests=1000000):
  '''takes a column of summary data, computes average and permutes, outputs'''
  origDiffMean = getDiffMean(summaryDataColumn, listPdbs)
  pValCounts = [0., 0.]  # above, below
  for test in range(numTests):
    newLists = statistics.permuteLists(listPdbs)
    testMean = getDiffMean(summaryDataColumn, newLists)
    if testMean >= origDiffMean:
      pValCounts[0] += 1.
    if testMean <= origDiffMean:
      pValCounts[1] += 1.
  pVals = [pValCount/float(numTests) for pValCount in pValCounts]
  outputFile = open(outputFileName, 'w')
  outputFile.write("origDiffMean\tmean1\tmean2\tpValAbove\tpValBelow\n")
  outputFile.write(str(origDiffMean) + "\t")
  outputFile.write(str(getMean(summaryDataColumn, listPdbs[0])) + "\t")
  outputFile.write(str(getMean(summaryDataColumn, listPdbs[1])) + "\t")
  for pVal in pVals:
    outputFile.write(str(pVal) + "\t")
  outputFile.write("\n")
  outputFile.close()
예제 #2
0
def makeCompareResidueReportAlternate(
    pdbs, outputFilename="residue.bfactor", numTests=9999,
    correctionAll=0., correctionBeta=0.):
  '''different way to do p-vals, instead of permuting all data, permute the
  pairs of hyp/meso pdb files.'''
  residueNames = aminoAcid3Codes  # for now ignore what is in the files
  fileTemp2 = open(outputFilename + ".pvals.txt", 'w')
  fileTemp2.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n")
  fileTemp3 = open(outputFilename + ".pvals.beta.txt", 'w')
  fileTemp3.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n")
  #first find means
  means = [{}, {}]
  betaMeans = [{}, {}]
  overallList = [[], []]
  overallBetaList = [[], []]
  totalMeans, totalBetaMeans = [0., 0.], [0., 0.]
  for code in residueNames:
    betaKsLists = [[], []]
    for lindex in range(2):  # either a or b
      totalList, betaList = [], []
      for pdbResidues in pdbs[lindex]:
        if code in pdbResidues:
          for atomValues in pdbResidues[code].values():
            totalList.extend(atomValues)
      means[lindex][code] = statistics.computeMean(totalList)
      for pdbResidues in pdbs[lindex]:
        if code in pdbResidues:
          betaList.extend(pdbResidues[code][carbonBetaCodes[code]])
      betaKsLists[lindex] = betaList
      betaMeans[lindex][code] = statistics.computeMean(betaList)
      overallList[lindex].extend(totalList)
      overallBetaList[lindex].extend(betaList)
    #use betaKsLists to compute ks stuff
  for lindex in range(2):  # either a or b
    totalMeans[lindex] = statistics.computeMean(overallList[lindex])
    totalBetaMeans[lindex] = statistics.computeMean(overallBetaList[lindex])
  #print means, betaMeans
  pValueCounts = [{}, {}]  # first is above, second is below
  pValueBetaCounts = [{}, {}]
  for code in residueNames+["ALL"]:  # initialize counts, even for overall total
    for aboveBelow in range(2):
      pValueCounts[aboveBelow][code] = 1
      pValueBetaCounts[aboveBelow][code] = 1
  for test in xrange(numTests):
    testMeans = [{}, {}]
    testBetaMeans = [{}, {}]
    overallList = [[], []]
    overallBetaList = [[], []]
    totalTestMeans, totalTestBetaMeans = [0., 0.], [0., 0.]
    newPdbs = statistics.permuteLists(pdbs)
    for code in residueNames:
      for lindex in range(2):  # either a or b
        totalList, betaList = [], []
        for pdbResidues in newPdbs[lindex]:
          if code in pdbResidues:
            for atomValues in pdbResidues[code].values():
              totalList.extend(atomValues)
        testMeans[lindex][code] = statistics.computeMean(totalList)
        for pdbResidues in newPdbs[lindex]:
          if code in pdbResidues:
            betaList.extend(pdbResidues[code][carbonBetaCodes[code]])
        testBetaMeans[lindex][code] = statistics.computeMean(betaList)
        overallList[lindex].extend(totalList)
        overallBetaList[lindex].extend(betaList)
    for lindex in range(2):  # either a or b
      totalTestMeans[lindex] = statistics.computeMean(overallList[lindex])
      totalTestBetaMeans[lindex] = \
          statistics.computeMean(overallBetaList[lindex])
    for code in residueNames:  # calc pval for each residue
      testMeanDiff = testMeans[0][code] - testMeans[1][code]
      origMeanDiff = means[0][code] - means[1][code] - correctionAll
      if origMeanDiff <= testMeanDiff:
        pValueCounts[0][code] += 1
      if origMeanDiff >= testMeanDiff:
        pValueCounts[1][code] += 1
      testMeanDiff = testBetaMeans[0][code] - testBetaMeans[1][code]
      origMeanDiff = betaMeans[0][code] - betaMeans[1][code] - correctionBeta
      if origMeanDiff <= testMeanDiff:
        pValueBetaCounts[0][code] += 1
      if origMeanDiff >= testMeanDiff:
        pValueBetaCounts[1][code] += 1
    code = "ALL"  # fake residue name for overall
    testMeanDiff = totalTestMeans[0] - totalTestMeans[1]
    origMeanDiff = totalMeans[0] - totalMeans[1] - correctionAll
    if origMeanDiff <= testMeanDiff:
      pValueCounts[0][code] += 1
    if origMeanDiff >= testMeanDiff:
      pValueCounts[1][code] += 1
    testMeanDiff = totalTestBetaMeans[0] - totalTestBetaMeans[1] - \
        correctionBeta
    origMeanDiff = totalBetaMeans[0] - totalBetaMeans[1]
    if origMeanDiff <= testMeanDiff:
      pValueBetaCounts[0][code] += 1
    if origMeanDiff >= testMeanDiff:
      pValueBetaCounts[1][code] += 1
  for code in residueNames:  # output time
    fileTemp2.write(code + " " + str(means[0][code]-means[1][code]) + " ")
    fileTemp2.write(str(means[0][code]) + " " + str(means[1][code]) + " ")
    fileTemp2.write(str(pValueCounts[0][code]/float(1+numTests)) + " ")
    fileTemp2.write(str(pValueCounts[1][code]/float(1+numTests)) + " ")
    fileTemp2.write("\n")
    fileTemp3.write(
        code + " " + str(betaMeans[0][code]-betaMeans[1][code]) + " ")
    fileTemp3.write(
        str(betaMeans[0][code]) + " " + str(betaMeans[1][code]) + " ")
    fileTemp3.write(str(pValueBetaCounts[0][code]/float(1+numTests)) + " ")
    fileTemp3.write(str(pValueBetaCounts[1][code]/float(1+numTests)) + " ")
    fileTemp3.write("\n")
  code = "ALL"  # fake for overall
  fileTemp2.write("ALL " + str(totalMeans[0]-totalMeans[1]) + " ")
  fileTemp2.write(str(totalMeans[0]) + " " + str(totalMeans[1]) + " ")
  fileTemp2.write(str(pValueCounts[0][code]/float(1+numTests)) + " ")
  fileTemp2.write(str(pValueCounts[1][code]/float(1+numTests)) + " ")
  fileTemp2.write("\n")
  fileTemp3.write("ALL " + str(totalBetaMeans[0]-totalBetaMeans[1]) + " ")
  fileTemp3.write(str(totalBetaMeans[0]) + " " + str(totalBetaMeans[1]) + " ")
  fileTemp3.write(str(pValueBetaCounts[0][code]/float(1+numTests)) + " ")
  fileTemp3.write(str(pValueBetaCounts[1][code]/float(1+numTests)) + " ")
  fileTemp3.write("\n")
  fileTemp2.close()
  fileTemp3.close()
  return totalMeans[0]-totalMeans[1], totalBetaMeans[0]-totalBetaMeans[1]