def refinePockets( self, columnList, columnsToMean, columnsToStddev, resColNums, sizeCol, selfScores, sizeMin=-1, sizeMax=10000000000, doSelfScore=True, justNodes=None, matchList=None, possNodes=None, notTheseNodes=None): '''finds the pocket with the highest mean difference, tries to find a better pocket for that tree, iterate until optimum?''' #go through matchlist, max lists of dists for each node if notTheseNodes is None: notTheseNodes = [] justNodesToScores = {} nodeToTree = {} for aMatch in matchList: tmData1, tmData2, node1, node2, score = aMatch # unpack for aNode in (node1, node2): try: justNodesToScores[aNode].append(score) except KeyError: justNodesToScores[aNode] = [score] nodeToTree[node1] = tmData1 nodeToTree[node2] = tmData2 justNodesMeans = [] for justNode in justNodesToScores.keys(): if justNode not in notTheseNodes: meanScore = statistics.computeMean(justNodesToScores[justNode]) justNodesMeans.append((justNode, meanScore)) if 0 == len(justNodesMeans): # nothing to do return False, None justNodesMeans.sort(key=operator.itemgetter(1)) # sort to find biggest worstNode, worstScore = justNodesMeans[-1] # unpack the one to fix worstTree = nodeToTree[worstNode] newPossNodes = possNodes[worstTree] possNodeScores = {} for possNode in newPossNodes: if possNode.attributes[sizeCol] < sizeMax and \ possNode.attributes[sizeCol] > sizeMin: possNodeScores[possNode] = [] for tmData in self.tmDataList: if tmData != worstTree: # don't do this tree against itself justNode = justNodes[tmData] score = compareColumns( justNode, possNode, columnList, columnsToMean, columnsToStddev) totalScore = score if doSelfScore: totalScore += min(selfScores[node1], selfScores[node2]) possNodeScores[possNode].append(totalScore) possNodeMean = [] for possNode in newPossNodes: if possNode.attributes[sizeCol] < sizeMax and \ possNode.attributes[sizeCol] > sizeMin: meanScore = statistics.computeMean(possNodeScores[possNode]) possNodeMean.append((possNode, meanScore)) possNodeMean.sort(key=operator.itemgetter(1)) newBestNode = possNodeMean[0][0] if worstNode != newBestNode: justNodes[worstTree] = newBestNode return True, worstNode # indicates change else: return False, worstNode # the node was already the best
def tstEdgeCurvature(trianglePoint, pointXyz, pointTriangle, pointNeighbor): '''for each edge, calculate the angle between the triangles around it. calculate point curvature based on average of these for each point''' triXyz = {} for triPtList in trianglePoint: tri = triPtList[0] xyz = [] for pt in triPtList[1:]: xyz.append(pointXyz[pt-1][1:]) triXyz[tri] = xyz edgeAngle = {} # store edge angles as they are found so don't duplicate work pointMeanAngle = [] # once all edges found, find mean, store in tst format pointWeightedMeanAngle = [] # weight by edge length for pointNeighborList in pointNeighbor: mainPt = pointNeighborList[0] angles = [] weightedAngles = [] for otherPt in pointNeighborList[2:]: # pN[1] is count ptList = [mainPt, otherPt] ptList.sort() ptTuple = tuple(ptList) # canonicalized format edgeLength = geometry.distL2( pointXyz[mainPt-1][1:], pointXyz[otherPt-1][1:]) if ptTuple in edgeAngle: # already done angles.append(edgeAngle[ptTuple]) weightedAngles.append(edgeAngle[ptTuple] * edgeLength) else: # have to compute it mainTris = set(pointTriangle[mainPt-1][2:]) otherTris = set(pointTriangle[otherPt-1][2:]) tris = list(mainTris.intersection(otherTris)) #will almost always be 2 #for now assume only 2 normalA = geometry.getTriNormalList(triXyz[tris[0]]) normalB = geometry.getTriNormalList(triXyz[tris[1]]) unsignedAngle = geometry.getAngle(normalA, normalB) # unsigned centerTriA = geometry.getAverage(triXyz[tris[0]]) planeA = geometry.calculatePlaneD(normalA, centerTriA) ptsB = set(trianglePoint[tris[1]-1][1:]) edgePts = set(ptList) otherB = pointXyz[list(ptsB.difference(edgePts))[0]-1][1:] side = geometry.checkPlaneSide(normalA+[planeA], otherB) if side: angle = - unsignedAngle * 180 / math.pi # concave negative else: angle = unsignedAngle * 180 / math.pi # convex positive edgeAngle[ptTuple] = angle angles.append(angle) weightedAngles.append(angle*edgeLength) pointMeanAngle.append([mainPt, statistics.computeMean(angles)]) pointWeightedMeanAngle.append( [mainPt, statistics.computeMean(weightedAngles)]) return edgeAngle, pointMeanAngle, pointWeightedMeanAngle
def summarizeOneFeature(tmDataList, columnName, intervals=50, outName="a.txt"): '''takes that column, makes a histogram for each structure''' outFile = open(outName, 'w') columnNum = tmDataList[0].titleToColumn(columnName) treeData = {} overallMax = 0. for tm3tree in tmDataList: data = tm3tree.getListColumn(columnNum) overallMax = max(overallMax, max(data)) treeData[tm3tree] = data if intervals == "max": intervals = overallMax # 1 per interval = overallMax/intervals # number of intervals desired #print a header outFile.write("name\tcount\tmean\tstddev\t") currentOut = 0. while currentOut < overallMax: outFile.write(str(currentOut) + "\t") currentOut += interval outFile.write("\n") for tm3tree in tmDataList: tm3data = treeData[tm3tree] avgData = statistics.computeMean(tm3data) stddevData = statistics.computeStdDev(tm3data, avgData) histo, outMax = statistics.computeHistogram(tm3data, interval, overallMax) outFile.write(tm3tree.inputFileName + "\t") outFile.write(str(len(tm3data)) + "\t") outFile.write(str(avgData) + "\t") outFile.write(str(stddevData) + "\t") for histoCount in histo: outFile.write(str(histoCount) + "\t") outFile.write("\n") outFile.close()
def analyzelistsunpaired(fileNames, numTests=1000000): '''somehow these files encode 2 matching lists. either one file with 2 columns or 2 files with one column each. or something. if there is only one value for one list, replicate it to the length of the other one.''' lists = [[], []] for fileCount, fileName in enumerate(fileNames): for line in open(fileName, 'r'): tokens = string.split(string.strip(line)) if len(tokens) == 1: lists[fileCount].append(float(tokens[0])) else: for tokenCount, token in enumerate(tokens): lists[tokenCount].append(float(token)) diffMean, pVal1, pVal2 = statistics.pvalueDiffMeansLazy( lists[0], lists[1], numTests) cohenD = statistics.cohenEffectSize(lists[0], lists[1]) print "mean1, mean2, diffMean, pVal1, pVal2, cohenD" print statistics.computeMean(lists[0]), statistics.computeMean(lists[1]), \ diffMean, pVal1, pVal2, cohenD
def makeMeanPerProteinReport(pdbs, outName): '''pdbs is a list of dict of dicts, outname is filename''' outFile = open(outName, 'w') for pdb in pdbs: totalList = [] for resList in pdbs[pdb].itervalues(): for atomList in resList.itervalues(): totalList.extend(atomList) avg = statistics.computeMean(totalList) outFile.write(pdb + "\t") outFile.write(str(avg) + "\n") outFile.close()
def calcColumnsMeanStddev(columnList, tmDataList): """returns a dict of column number to mean and another dict to stddev.""" columnsToMean = {} columnsToStddev = {} for column in columnList: colData = [] for tmData in tmDataList: colData.extend(tmData.getListColumn(column)) colAvg = statistics.computeMean(colData) colStddev = statistics.computeStdDev(colData, colAvg) columnsToMean[column] = colAvg columnsToStddev[column] = colStddev return columnsToMean, columnsToStddev
def averageTheta(path): '''calculates the mean theta of ab to bc for each triple of points''' if len(path) <= 2: return [], 0 # no reason to count, not enough nodes thetas = [] firstPt = path[0][1:4] # first node secondPt = path[1][1:4] # second node for node in path[2:]: firstVec = geometry.getVector(secondPt, firstPt) secondVec = geometry.getVector(node[1:4], secondPt) theta = geometry.getAngle(firstVec, secondVec) thetas.append(theta) firstPt = secondPt secondPt = node[1:4] averageTheta = statistics.computeMean(thetas) return thetas, averageTheta
return edgeAngle, pointMeanAngle, pointWeightedMeanAngle #this is main if -1 != string.find(sys.argv[0], "tstCurvature.py"): for tstFileName in sys.argv[1:]: tstD = tstdata.tstData( tstFileName, necessaryKeys=tstdata.tstData.necessaryKeysForCurve) eA, pA, pWA = tstEdgeCurvature( tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_XYZ'], tstD.dict['POINT_TRIANGLE'], tstD.dict['POINT_NEIGHBOR']) ''' #append curvature to tst file tstFile = open(tstFileName, 'a') tstdata.writeEntrySingleFloat(pWA, "POINT_CURVATURE_EDGE LIST", \ "END POINT_CURVATURE_EDGE", tstFile) tstFile.close() ''' curves, absCurves = [], [] for pointWeightCurv in pWA: curves.append(pointWeightCurv[1]) absCurves.append(abs(pointWeightCurv[1])) meanCurv = statistics.computeMean(curves) meanAbsCurv = statistics.computeMean(absCurves) curves, absCurves = [], [] for pointWeightCurv in eA.values(): curves.append(pointWeightCurv) absCurves.append(abs(pointWeightCurv)) meanCurvE = statistics.computeMean(curves) meanAbsCurvE = statistics.computeMean(absCurves) print tstFileName, meanCurv, meanAbsCurv, meanCurvE, meanAbsCurvE
def makeAtomReport(residueData, outputFilename="atom.bfactor", runGraphs=True): residueNames = residueData.keys() residueNames.sort() fileTemp = open(outputFilename + '.txt', 'w') fileTemp.write("ResidueName AtomName Mean StdDev Low High Count\n") resAtomAverage = {} for residueName in residueNames: resAtomAverage[residueName] = {} atomNames = residueData[residueName].keys() atomNames.sort() for atomName in atomNames: totalList = residueData[residueName][atomName] average = statistics.computeMean(totalList) resAtomAverage[residueName][atomName] = average stddev = statistics.computeStdDev(totalList, average) fileTemp.write( residueName + " " + atomName + " " + str(average) + " " + str(stddev) + " " + str(min(totalList)) + " " + str(max(totalList)) + " " + str(len(totalList)) + "\n") fileTemp.close() if gnuplotAvailable and runGraphs: #first make backbone-sidechain report plotter = Gnuplot.Gnuplot(debug=0) yLabels = '(' yDataBackbone, yDataSidechain = [], [] yDataCa, yDataCb = [], [] for index, code in enumerate(aminoAcid3Codes): yLabels += '"' + str(code) + '" ' + str(index) if index != len(aminoAcid3Codes) - 1: yLabels += ', ' backValues, sideValues = [], [] caValues, cbValues = [], [] try: for key, values in residueData[code].iteritems(): if string.strip(key) in backboneAtomCodes: backValues.extend(values) else: sideValues.extend(values) if string.strip(key) == caCode: caValues.extend(values) elif string.strip(key) == cbCode: cbValues.extend(values) except KeyError: # sometimes one residue won't be represented pass # but that is okay if len(backValues) == 0: yDataBackbone.append(0) else: yDataBackbone.append(sum(backValues)/float(len(backValues))) if len(sideValues) == 0: yDataSidechain.append(0) else: yDataSidechain.append(sum(sideValues)/float(len(sideValues))) if len(caValues) == 0: yDataCa.append(0) else: yDataCa.append(sum(caValues)/float(len(caValues))) if len(cbValues) == 0: yDataCb.append(0) else: yDataCb.append(sum(cbValues)/float(len(cbValues))) if len(backValues + sideValues + caValues + cbValues) > 0: makeAminoAcidHistogram( plotter, backValues, sideValues, caValues, cbValues, outputFilename + "." + str(code)) yLabels += ')' graphDataBackbone = Gnuplot.Data(range(20), yDataBackbone, title="Backbone") graphDataSidechain = Gnuplot.Data( range(20), yDataSidechain, title="Sidechain") graphDataCa = Gnuplot.Data(range(20), yDataCa, title="C-alpha") graphDataCb = Gnuplot.Data(range(20), yDataCb, title="C-beta") plotter('set terminal png') plotter('set output "' + outputFilename + '.png"') plotter('set data style points') plotter('set key right top') plotter('set xtics ' + yLabels) plotter( 'set yrange [' + str(min(yDataBackbone + yDataSidechain) - 0.5) + ':' + str(max(yDataBackbone+yDataSidechain)+0.5) + ']') plotter('set xrange [-1:20]') plotter.xlabel('Residue') plotter.ylabel('Mean Travel In Distance') plotter.plot(graphDataBackbone, graphDataSidechain) plotter('set output "' + outputFilename + '.ab.png"') if "buried" in outputFilename: plotter('set yrange [' + str(min(yDataCa + yDataCb) - 0.5) + ':6.]') else: plotter( 'set yrange [' + str(min(yDataCa + yDataCb) - 0.5) + ':' + str(max(yDataCa + yDataCb) + 0.5) + ']') plotter.plot(graphDataCa, graphDataCb)
def makeResidueReport( residueData, outputFilename="residue.bfactor", maxY=False, maxYBeta=False, runGraphs=False): #residueNames = residueData.keys() residueNames = aminoAcid3Codes residueNames.sort() fileTemp = open(outputFilename + ".txt", 'w') fileTemp.write("ResidueName Mean StdDev Low High Count\n") averages, stddevs = {}, {} betaAverages, betaStddevs = {}, {} for residueName in residueNames: #assemble into one big list totalList = [] if residueName in residueData: for data in residueData[residueName].values(): totalList.extend(data) average = statistics.computeMean(totalList) averages[residueName] = average stddev = statistics.computeStdDev(totalList, average) stddevs[residueName] = stddev betaList = [] if residueName in residueData: data = residueData[residueName] betaList.extend(data[carbonBetaCodes[residueName]]) else: data = [] if len(betaList) > 0: betaAvg = statistics.computeMean(betaList) betaAverages[residueName] = betaAvg betaStddevs[residueName] = statistics.computeStdDev(betaList, betaAvg) if len(totalList) > 0: fileTemp.write( residueName + " " + str(average) + " " + str(stddev) + " " + str(min(totalList)) + " " + str(max(totalList)) + " " + str(len(totalList)) + "\n") else: fileTemp.write( residueName + " " + str(average) + " " + str(stddev) + " " + str(0.) + " " + str(0.) + " " + str(0.) + "\n") fileTemp.close() if gnuplotAvailable and runGraphs: plotter = Gnuplot.Gnuplot(debug=0) yLabels = '(' yData, yError, yMin, yMax = [], [], 10, 0 yBetaData, yBetaError, yBetaMin, yBetaMax = [], [], 10, 0 for index, code in enumerate(aminoAcid3Codes): yLabels += '"' + str(code) + '" ' + str(index) if index != len(aminoAcid3Codes) - 1: yLabels += ', ' if code in averages: yData.append(averages[code]) yError.append(stddevs[code]) yMin = min(yMin, yData[-1]-yError[-1]) yMax = max(yMax, yData[-1]+yError[-1]) yBetaData.append(betaAverages[code]) yBetaError.append(betaStddevs[code]) yBetaMin = min(yBetaMin, yBetaData[-1]-yBetaError[-1]) yBetaMax = max(yBetaMax, yBetaData[-1]+yBetaError[-1]) else: # none of that residue yData.append(0) yError.append(0) yBetaData.append(0) yBetaError.append(0) yLabels += ')' graphData = Gnuplot.Data(range(20), yData, yError) plotter('set terminal png') plotter('set output "' + outputFilename + '.png"') plotter('set data style yerrorbars') plotter('set boxwidth 0.9 absolute') plotter('set xtics ' + yLabels) if maxY is False: plotter('set yrange [' + str(yMin-0.2) + ':' + str(yMax+0.2) + ']') else: plotter('set yrange [0:' + str(maxY) + ']') plotter('set xrange [-1:20]') plotter.xlabel('Residue') plotter.ylabel('Mean Travel In Distance') plotter.plot(graphData) #do another graph with just carbon-betas plotter('set output "' + outputFilename + '.beta.png"') graphDataBeta = Gnuplot.Data(range(20), yBetaData, yBetaError) plotter.ylabel('Mean Travel In Distance of Carbon Beta') if maxYBeta is False: plotter( 'set yrange [' + str(yBetaMin-0.2) + ':' + str(yBetaMax+0.2) + ']') else: plotter('set yrange [0:' + str(maxYBeta) + ']') plotter.plot(graphDataBeta)
def makeCompareResidueReport( residueBoth, outputFilename="residue.bfactor", maxY=False, maxYBeta=False, numTests=9): ranges = [-0.3, 0.6] residueNames = [] for residueName in residueBoth[0].keys() + residueBoth[1].keys(): if residueName not in residueNames: residueNames.append(residueName) residueNames.sort() #residueNames = aminoAcid3Codes #for now ignore what is in the files fileTemp = open(outputFilename + ".txt", 'w') fileTemp.write("ResidueName AtomName Mean StdDev Low High Count\n") fileTemp2 = open(outputFilename + ".pvals.txt", 'w') fileTemp2.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n") fileTemp3 = open(outputFilename + ".pvals.beta.txt", 'w') fileTemp3.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n") averages, stddevs = ({}, {}), ({}, {}) betaAverages, betaStddevs = ({}, {}), ({}, {}) totalLists, betaLists = ({}, {}), ({}, {}) for residueName in residueNames: totalList = [], [] betaList = [], [] for indexSet, residueData in enumerate(residueBoth): try: for data in residueData[residueName].values(): totalList[indexSet].extend(data) totalLists[indexSet][residueName] = totalList[indexSet] average = statistics.computeMean(totalList[indexSet]) averages[indexSet][residueName] = average #print average, residueName stddev = statistics.computeStdDev(totalList[indexSet], average) stddevs[indexSet][residueName] = stddev data = residueData[residueName] betaList[indexSet].extend(data[carbonBetaCodes[residueName]]) betaLists[indexSet][residueName] = betaList[indexSet] if len(betaList[indexSet]) > 0: betaAvg = statistics.computeMean(betaList[indexSet]) #print betaAvg, residueName betaAverages[indexSet][residueName] = betaAvg betaStddevs[indexSet][residueName] = statistics.computeStdDev( betaList[indexSet], betaAvg) fileTemp.write( residueName + " " + str(average) + " " + str(stddev) + " " + str(min(totalList)) + " " + str(max(totalList)) + " " + str(len(totalList)) + "\n") except (ZeroDivisionError, KeyError): pass # probably don't really need this residue anyway fileTemp.close() for index, code in enumerate(aminoAcid3Codes): # now do the pvalue tests meanA = averages[0][code] meanB = averages[1][code] listA = totalLists[0][code] listB = totalLists[1][code] pvals = statistics.pvalueDiffMeans(listA, listB, meanA-meanB, numTests) #fileTemp2.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n") fileTemp2.write(code + " " + str(meanA-meanB) + " " + str(meanA) + " ") fileTemp2.write(str(meanB) + " " + str(pvals[0]) + " " + str(pvals[1])) fileTemp2.write("\n") meanA = betaAverages[0][code] meanB = betaAverages[1][code] listA = betaLists[0][code] listB = betaLists[1][code] pvals = statistics.pvalueDiffMeans(listA, listB, meanA-meanB, numTests) fileTemp3.write(code + " " + str(meanA-meanB) + " " + str(meanA) + " ") fileTemp3.write(str(meanB) + " " + str(pvals[0]) + " " + str(pvals[1])) fileTemp3.write("\n") fileTemp2.close() fileTemp3.close() if gnuplotAvailable: plotter = Gnuplot.Gnuplot(debug=0) yLabels = '(' yData, yError, yMin, yMax = [], [], 10, 0 yBetaData, yBetaError, yBetaMin, yBetaMax = [], [], 10, 0 for index, code in enumerate(aminoAcid3Codes): yLabels += '"' + str(code) + '" ' + str(index) if index != len(aminoAcid3Codes) - 1: yLabels += ', ' yData.append(averages[0][code] - averages[1][code]) #yError.append(stddevs[0][code]) #yMin = min(yMin, yData[-1] - yError[-1]) #yMax = max(yMax, yData[-1] + yError[-1]) yMin = min(yMin, yData[-1]) yMax = max(yMax, yData[-1]) #print betaAverages[0][code] #print betaAverages[1][code] betaAvgDiff = 0. try: betaAvg0 = betaAverages[0][code] betaAvg1 = betaAverages[1][code] betaAvgDiff = betaAvg0 - betaAvg1 except KeyError: print code betaAvgDiff = 0. yBetaData.append(betaAvgDiff) #yBetaError.append(betaStddevs[0][code]) yBetaMin = min(yBetaMin, yBetaData[-1]) yBetaMax = max(yBetaMax, yBetaData[-1]) yLabels += ')' graphData = Gnuplot.Data(range(20), yData) plotter('set terminal png') plotter('set output "' + outputFilename + '.png"') plotter('set data style points') plotter('set boxwidth 0.9 absolute') plotter('set xtics ' + yLabels) if ranges: plotter('set yrange [' + str(ranges[0]) + ':' + str(ranges[1]) + ']') elif maxY is False: plotter('set yrange [' + str(yMin-0.2) + ':' + str(yMax+0.2) + ']') else: plotter('set yrange [0:' + str(maxY) + ']') plotter('set xrange [-1:20]') plotter.xlabel('Residue') plotter.ylabel('Mean Travel In Distance') plotter.plot(graphData) #do another graph with just carbon-betas plotter('set output "' + outputFilename + '.beta.png"') graphDataBeta = Gnuplot.Data(range(20), yBetaData) plotter.ylabel('Mean Travel In Distance of Carbon Beta') if ranges: plotter('set yrange [' + str(ranges[0]) + ':' + str(ranges[1]) + ']') elif maxYBeta is False: plotter( 'set yrange [' + str(yBetaMin-0.2) + ':' + str(yBetaMax+0.2) + ']') else: plotter('set yrange [0:' + str(maxYBeta) + ']') plotter.plot(graphDataBeta)
def makeCompareResidueReportAlternate( pdbs, outputFilename="residue.bfactor", numTests=9999, correctionAll=0., correctionBeta=0.): '''different way to do p-vals, instead of permuting all data, permute the pairs of hyp/meso pdb files.''' residueNames = aminoAcid3Codes # for now ignore what is in the files fileTemp2 = open(outputFilename + ".pvals.txt", 'w') fileTemp2.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n") fileTemp3 = open(outputFilename + ".pvals.beta.txt", 'w') fileTemp3.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n") #first find means means = [{}, {}] betaMeans = [{}, {}] overallList = [[], []] overallBetaList = [[], []] totalMeans, totalBetaMeans = [0., 0.], [0., 0.] for code in residueNames: betaKsLists = [[], []] for lindex in range(2): # either a or b totalList, betaList = [], [] for pdbResidues in pdbs[lindex]: if code in pdbResidues: for atomValues in pdbResidues[code].values(): totalList.extend(atomValues) means[lindex][code] = statistics.computeMean(totalList) for pdbResidues in pdbs[lindex]: if code in pdbResidues: betaList.extend(pdbResidues[code][carbonBetaCodes[code]]) betaKsLists[lindex] = betaList betaMeans[lindex][code] = statistics.computeMean(betaList) overallList[lindex].extend(totalList) overallBetaList[lindex].extend(betaList) #use betaKsLists to compute ks stuff for lindex in range(2): # either a or b totalMeans[lindex] = statistics.computeMean(overallList[lindex]) totalBetaMeans[lindex] = statistics.computeMean(overallBetaList[lindex]) #print means, betaMeans pValueCounts = [{}, {}] # first is above, second is below pValueBetaCounts = [{}, {}] for code in residueNames+["ALL"]: # initialize counts, even for overall total for aboveBelow in range(2): pValueCounts[aboveBelow][code] = 1 pValueBetaCounts[aboveBelow][code] = 1 for test in xrange(numTests): testMeans = [{}, {}] testBetaMeans = [{}, {}] overallList = [[], []] overallBetaList = [[], []] totalTestMeans, totalTestBetaMeans = [0., 0.], [0., 0.] newPdbs = statistics.permuteLists(pdbs) for code in residueNames: for lindex in range(2): # either a or b totalList, betaList = [], [] for pdbResidues in newPdbs[lindex]: if code in pdbResidues: for atomValues in pdbResidues[code].values(): totalList.extend(atomValues) testMeans[lindex][code] = statistics.computeMean(totalList) for pdbResidues in newPdbs[lindex]: if code in pdbResidues: betaList.extend(pdbResidues[code][carbonBetaCodes[code]]) testBetaMeans[lindex][code] = statistics.computeMean(betaList) overallList[lindex].extend(totalList) overallBetaList[lindex].extend(betaList) for lindex in range(2): # either a or b totalTestMeans[lindex] = statistics.computeMean(overallList[lindex]) totalTestBetaMeans[lindex] = \ statistics.computeMean(overallBetaList[lindex]) for code in residueNames: # calc pval for each residue testMeanDiff = testMeans[0][code] - testMeans[1][code] origMeanDiff = means[0][code] - means[1][code] - correctionAll if origMeanDiff <= testMeanDiff: pValueCounts[0][code] += 1 if origMeanDiff >= testMeanDiff: pValueCounts[1][code] += 1 testMeanDiff = testBetaMeans[0][code] - testBetaMeans[1][code] origMeanDiff = betaMeans[0][code] - betaMeans[1][code] - correctionBeta if origMeanDiff <= testMeanDiff: pValueBetaCounts[0][code] += 1 if origMeanDiff >= testMeanDiff: pValueBetaCounts[1][code] += 1 code = "ALL" # fake residue name for overall testMeanDiff = totalTestMeans[0] - totalTestMeans[1] origMeanDiff = totalMeans[0] - totalMeans[1] - correctionAll if origMeanDiff <= testMeanDiff: pValueCounts[0][code] += 1 if origMeanDiff >= testMeanDiff: pValueCounts[1][code] += 1 testMeanDiff = totalTestBetaMeans[0] - totalTestBetaMeans[1] - \ correctionBeta origMeanDiff = totalBetaMeans[0] - totalBetaMeans[1] if origMeanDiff <= testMeanDiff: pValueBetaCounts[0][code] += 1 if origMeanDiff >= testMeanDiff: pValueBetaCounts[1][code] += 1 for code in residueNames: # output time fileTemp2.write(code + " " + str(means[0][code]-means[1][code]) + " ") fileTemp2.write(str(means[0][code]) + " " + str(means[1][code]) + " ") fileTemp2.write(str(pValueCounts[0][code]/float(1+numTests)) + " ") fileTemp2.write(str(pValueCounts[1][code]/float(1+numTests)) + " ") fileTemp2.write("\n") fileTemp3.write( code + " " + str(betaMeans[0][code]-betaMeans[1][code]) + " ") fileTemp3.write( str(betaMeans[0][code]) + " " + str(betaMeans[1][code]) + " ") fileTemp3.write(str(pValueBetaCounts[0][code]/float(1+numTests)) + " ") fileTemp3.write(str(pValueBetaCounts[1][code]/float(1+numTests)) + " ") fileTemp3.write("\n") code = "ALL" # fake for overall fileTemp2.write("ALL " + str(totalMeans[0]-totalMeans[1]) + " ") fileTemp2.write(str(totalMeans[0]) + " " + str(totalMeans[1]) + " ") fileTemp2.write(str(pValueCounts[0][code]/float(1+numTests)) + " ") fileTemp2.write(str(pValueCounts[1][code]/float(1+numTests)) + " ") fileTemp2.write("\n") fileTemp3.write("ALL " + str(totalBetaMeans[0]-totalBetaMeans[1]) + " ") fileTemp3.write(str(totalBetaMeans[0]) + " " + str(totalBetaMeans[1]) + " ") fileTemp3.write(str(pValueBetaCounts[0][code]/float(1+numTests)) + " ") fileTemp3.write(str(pValueBetaCounts[1][code]/float(1+numTests)) + " ") fileTemp3.write("\n") fileTemp2.close() fileTemp3.close() return totalMeans[0]-totalMeans[1], totalBetaMeans[0]-totalBetaMeans[1]