def run(self, test, signLevel, statsResults, trials, bootstrapRep, progress): tableData = [] index = 0 for row in statsResults: feature = row[0] seq1 = row[1] seq2 = row[2] parentSeq1 = row[3] parentSeq2 = row[4] p1 = float(seq1) / parentSeq1 p2 = float(seq2) / parentSeq2 powerList = [] powerListLess5 = [] powerListGreater5 = [] for trial in xrange(0, trials): if progress != '': index += 1 progress.setValue(index) progress.setLabelText(feature + ' - Trial = ' + str(trial)) power = 0 processedReplicates = 0 for dummy in xrange(0, bootstrapRep): c1 = 0 c2 = 0 for dummy in xrange(0, parentSeq1): rnd = random.random() if rnd <= p1: c1 += 1 for dummy in xrange(0, parentSeq2): rnd = random.random() if rnd <= p2: c2 += 1 if c1 == 0 and c2 == 0: # This is a special case that many hypothesis test will not handle correctly # so we just ignore it. This will have little effect on the calculated power # of a test. continue processedReplicates += 1 pValueOneSided, pValueTwoSided = test.hypothesisTest( c1, c2, parentSeq1, parentSeq2) if pValueTwoSided < signLevel: power += 1 if processedReplicates > 0: if min([seq1, seq2]) <= 5: powerListLess5.append( float(power) / processedReplicates) else: powerListGreater5.append( float(power) / processedReplicates) powerList.append(float(power) / processedReplicates) row = [] row.append(feature) row.append(seq1) row.append(seq2) row.append(parentSeq1) row.append(parentSeq2) row.append(float(seq1) / parentSeq1) row.append(float(seq2) / parentSeq2) row.append(mean(powerList)) row.append(stdDev(powerList)) if math.isnan(mean(powerListLess5)): row.append('') else: row.append(mean(powerListLess5)) if math.isnan(stdDev(powerListLess5)): row.append('') else: row.append(stdDev(powerListLess5)) if math.isnan(mean(powerListGreater5)): row.append('') else: row.append(mean(powerListGreater5)) if math.isnan(stdDev(powerListGreater5)): row.append('') else: row.append(stdDev(powerListGreater5)) tableData.append(row) return tableData
def run(self, statTest, effectSizeMeasure, profile, progress=None): self.results.data = [] self.results.test = statTest.name self.results.profile = profile if progress == 'Verbose': print ' Processing feature:' index = 0 for feature in profile.getFeatures(): if progress == 'Verbose': print ' ' + feature elif progress != None: if progress.wasCanceled(): self.results.data = [] return index += 1 progress.setValue(index) seqCount = profile.getActiveFeatureCounts(feature) parentCount = profile.getActiveParentCounts(feature) data = profile.getActiveFeatureProportions(feature) pValue, note = statTest.hypothesisTest(data) effectSize = effectSizeMeasure.run(data) row = [feature, float(pValue), float(pValue), effectSize, note] for i in xrange(0, len(seqCount)): propGroup = [] for j in xrange(0, len(seqCount[i])): sc = seqCount[i][j] pc = parentCount[i][j] if pc > 0: propGroup.append(sc * 100.0 / pc) else: propGroup.append(0.0) meanGroup = mean(propGroup) row.append(meanGroup) row.append(stdDev(propGroup, meanGroup)) for i in xrange(0, len(seqCount)): for j in xrange(0, len(seqCount[i])): sc = seqCount[i][j] pc = parentCount[i][j] row.append(sc) row.append(pc) if pc > 0: row.append(sc * 100.0 / pc) else: row.append(0.0) self.results.data.append(row) headingsSampleStats = [] for i in xrange(0, len(profile.activeSamplesInGroups)): for sampleName in profile.activeSamplesInGroups[i]: headingsSampleStats.append(sampleName) headingsSampleStats.append(sampleName + ': parent seq. count') headingsSampleStats.append(sampleName + ': rel. freq. (%)') self.results.createTableHeadings(profile.activeGroupNames, headingsSampleStats) if len(self.results.data) >= 1: # sort results according to p-values self.results.data = TableHelper.SortTable( self.results.data, [self.results.dataHeadings['pValues']]) if progress != None and progress != 'Verbose': index += 1 progress.setValue(index)
def plot(self, profile, statsResults): if len(profile.profileDict) <= 0: self.emptyAxis() return if len(profile.profileDict) > 10000: QtGui.QApplication.instance().setOverrideCursor( QtGui.QCursor(QtCore.Qt.ArrowCursor)) reply = QtGui.QMessageBox.question( self, 'Continue?', 'Profile contains ' + str(len(profile.profileDict)) + ' features. ' + 'It may take several seconds to generate this plot. Exploring the data at a higher hierarchy level is recommended. ' + 'Do you wish to continue?', QtGui.QMessageBox.Yes, QtGui.QMessageBox.No) QtGui.QApplication.instance().restoreOverrideCursor() if reply == QtGui.QMessageBox.No: self.emptyAxis() return # *** Colour of plot elements axesColour = str(self.preferences['Axes colour'].name()) group1Colour = str( self.preferences['Group colours'][profile.groupName1].name()) group2Colour = str( self.preferences['Group colours'][profile.groupName2].name()) # *** Set sample names self.groupName1 = profile.groupName1 self.groupName2 = profile.groupName2 # *** Create lists for each quantity of interest and calculate spread of data groupData1, groupData2 = profile.getFeatureProportionsAll() features = profile.getFeatures() field1 = [] field2 = [] xSpread = [] ySpread = [] for i in xrange(0, len(groupData1)): mean1 = mean(groupData1[i]) mean2 = mean(groupData2[i]) field1.append(mean1) field2.append(mean2) if self.spreadMethod == 'standard deviation': xSpread.append([ max(mean1 - stdDev(groupData1[i], mean1), 0), min(mean1 + stdDev(groupData1[i], mean1), 100) ]) ySpread.append([ max(mean2 - stdDev(groupData2[i], mean2), 0), min(mean2 + stdDev(groupData2[i], mean2), 100) ]) elif self.spreadMethod == '2 * standard deviation': xSpread.append([ max(mean1 - 2 * stdDev(groupData1[i], mean1), 0), min(mean1 + 2 * stdDev(groupData1[i], mean1), 100) ]) ySpread.append([ max(mean2 - 2 * stdDev(groupData2[i], mean2), 0), min(mean2 + 2 * stdDev(groupData2[i], mean2), 100) ]) elif self.spreadMethod == '25th and 75th percentile': spread1 = mquantiles(groupData1[i], prob=[0.25, 0.75]) spread2 = mquantiles(groupData2[i], prob=[0.25, 0.75]) xSpread.append([max(spread1[0], 0), min(spread1[1], 100)]) ySpread.append([max(spread2[0], 0), min(spread2[1], 100)]) elif self.spreadMethod == '9th and 91st percentile': spread1 = mquantiles(groupData1[i], prob=[0.09, 0.91]) spread2 = mquantiles(groupData2[i], prob=[0.09, 0.91]) xSpread.append([max(spread1[0], 0), min(spread1[1], 100)]) ySpread.append([max(spread2[0], 0), min(spread2[1], 100)]) elif self.spreadMethod == '2nd and 98th percentile': spread1 = mquantiles(groupData1[i], prob=[0.02, 0.98]) spread2 = mquantiles(groupData2[i], prob=[0.02, 0.98]) xSpread.append([max(spread1[0], 0), min(spread1[1], 100)]) ySpread.append([max(spread2[0], 0), min(spread2[1], 100)]) elif self.spreadMethod == 'minimum and maximum': xSpread.append([max(groupData1[i]), min(groupData1[i])]) ySpread.append([max(groupData2[i]), min(groupData2[i])]) # *** Set figure size self.fig.clear() self.fig.set_size_inches(self.figWidth, self.figHeight) if self.bShowHistograms: histogramSizeX = self.histogramSize / self.figWidth histogramSizeY = self.histogramSize / self.figHeight else: histogramSizeX = 0.0 histogramSizeY = 0.0 padding = 0.1 # inches xOffsetFigSpace = (0.4 + padding) / self.figWidth yOffsetFigSpace = (0.3 + padding) / self.figHeight axesScatter = self.fig.add_axes([ xOffsetFigSpace, yOffsetFigSpace, 1.0 - xOffsetFigSpace - histogramSizeX - (2 * padding) / self.figWidth, 1.0 - yOffsetFigSpace - histogramSizeY - (2 * padding) / self.figHeight ]) if self.bShowHistograms: axesTopHistogram = self.fig.add_axes([ xOffsetFigSpace, 1.0 - histogramSizeY - padding / self.figHeight, 1.0 - xOffsetFigSpace - histogramSizeX - (2 * padding) / self.figWidth, histogramSizeY ]) axesRightHistogram = self.fig.add_axes([ 1.0 - histogramSizeX - padding / self.figWidth, yOffsetFigSpace, histogramSizeX, 1.0 - yOffsetFigSpace - histogramSizeY - (2 * padding) / self.figHeight ]) # *** Handle mouse events tooltips = [] for i in xrange(0, len(field1)): tooltip = features[i] + '\n\n' tooltip += (self.groupName1 + ' mean proportion: %.3f' % field1[i]) + '\n' tooltip += (self.groupName2 + ' mean proportion: %.3f' % field2[i]) + '\n\n' tooltip += 'Difference between mean proportions (%): ' + ( '%.3f' % (field1[i] - field2[i])) + '\n' if field2[i] != 0: tooltip += 'Ratio of mean proportions: %.3f' % (field1[i] / field2[i]) else: tooltip += 'Ratio of mean proportions: undefined' if statsResults.profile != None: pValue = statsResults.getFeatureStatisticAsStr( features[i], 'pValues') pValueCorrected = statsResults.getFeatureStatisticAsStr( features[i], 'pValuesCorrected') tooltip += '\n\n' tooltip += 'p-value: ' + pValue + '\n' tooltip += 'Corrected p-value: ' + pValueCorrected tooltips.append(tooltip) self.plotEventHandler = PlotEventHandler(field1, field2, tooltips) self.mouseEventCallback(self.plotEventHandler) # *** Calculate R^2 value slope, intercept, r_value, p_value, std_err = linregress( field1, field2) # *** Plot data # set visual properties of all points colours = [] highlightedField1 = [] highlightedField2 = [] highlighColours = [] for i in xrange(0, len(field1)): if field1[i] > field2[i]: colours.append(group1Colour) else: colours.append(group2Colour) if features[i] in self.preferences['Highlighted group features']: highlightedField1.append(field1[i]) highlightedField2.append(field2[i]) highlighColours.append(colours[i]) # scatter plot axesScatter.scatter(field1, field2, c=colours, s=self.markerSize, zorder=5) if len(highlightedField1) > 0: axesScatter.scatter(highlightedField1, highlightedField2, c=highlighColours, s=self.markerSize, edgecolors='red', linewidth=2, zorder=10) # plot CIs if self.spreadMethod != 'None': xlist = [] ylist = [] for i in xrange(0, len(field1)): # horizontal CIs xlist.append(xSpread[i][0]) xlist.append(xSpread[i][1]) xlist.append(None) ylist.append(field2[i]) ylist.append(field2[i]) ylist.append(None) # vertical CIs xlist.append(field1[i]) xlist.append(field1[i]) xlist.append(None) ylist.append(ySpread[i][0]) ylist.append(ySpread[i][1]) ylist.append(None) axesScatter.plot(xlist, ylist, '-', color='gray', antialiased=False) # plot y=x line maxProportion = max(max(field1), max(field2)) * 1.05 axesScatter.plot([0, maxProportion], [0, maxProportion], color=axesColour, linestyle='dashed', marker='', zorder=1) axesScatter.set_xlabel(self.groupName1 + ' (%)') axesScatter.set_ylabel(self.groupName2 + ' (%)') if self.bShowR2: axesScatter.text(0.02, 0.98, r'R$^2$ = ' + ('%0.3f' % r_value**2), horizontalalignment='left', verticalalignment='top', transform=axesScatter.transAxes) axesScatter.set_xlim(0, maxProportion) axesScatter.set_ylim(0, maxProportion) # *** Prettify scatter plot for line in axesScatter.yaxis.get_ticklines(): line.set_color(axesColour) for line in axesScatter.xaxis.get_ticklines(): line.set_color(axesColour) for loc, spine in axesScatter.spines.iteritems(): spine.set_color(axesColour) # plot histograms if not self.bShowHistograms: for a in axesScatter.yaxis.majorTicks: a.tick1On = True a.tick2On = False for a in axesScatter.xaxis.majorTicks: a.tick1On = True a.tick2On = False for line in axesScatter.yaxis.get_ticklines(): line.set_color(axesColour) for line in axesScatter.xaxis.get_ticklines(): line.set_color(axesColour) for loc, spine in axesScatter.spines.iteritems(): if loc in ['right', 'top']: spine.set_color('none') else: spine.set_color(axesColour) else: # show histograms # plot top histogram axesTopHistogram.xaxis.set_major_formatter(NullFormatter()) pdf, bins, patches = axesTopHistogram.hist(field1, bins=self.numBins, facecolor=group1Colour) axesTopHistogram.set_xlim(axesScatter.get_xlim()) axesTopHistogram.set_yticks([0, max(pdf)]) axesTopHistogram.set_ylim([0, max(pdf) * 1.05]) # plot right histogram axesRightHistogram.yaxis.set_major_formatter(NullFormatter()) pdf, bins, patches = axesRightHistogram.hist( field2, bins=self.numBins, orientation='horizontal', facecolor=group2Colour) axesRightHistogram.set_ylim(axesScatter.get_ylim()) axesRightHistogram.set_xticks([0, max(pdf)]) axesRightHistogram.set_xlim([0, max(pdf) * 1.05]) # *** Prettify histogram plot for a in axesTopHistogram.yaxis.majorTicks: a.tick1On = True a.tick2On = False for a in axesTopHistogram.xaxis.majorTicks: a.tick1On = True a.tick2On = False for line in axesTopHistogram.yaxis.get_ticklines(): line.set_color(axesColour) for line in axesTopHistogram.xaxis.get_ticklines(): line.set_color(axesColour) for loc, spine in axesTopHistogram.spines.iteritems(): if loc in ['right', 'top']: spine.set_color('none') else: spine.set_color(axesColour) for a in axesRightHistogram.yaxis.majorTicks: a.tick1On = True a.tick2On = False for a in axesRightHistogram.xaxis.majorTicks: a.tick1On = True a.tick2On = False for line in axesRightHistogram.yaxis.get_ticklines(): line.set_color(axesColour) for line in axesRightHistogram.xaxis.get_ticklines(): line.set_color(axesColour) for loc, spine in axesRightHistogram.spines.iteritems(): if loc in ['right', 'top']: spine.set_color('none') else: spine.set_color(axesColour) self.updateGeometry() self.draw()
def run(self, statTest, effectSizeMeasure, profile, progress = None): self.results.data = [] self.results.test = statTest.name self.results.profile = profile if progress == 'Verbose': print ' Processing feature:' index = 0 for feature in profile.getFeatures(): if progress == 'Verbose': print ' ' + feature elif progress != None: if progress.wasCanceled(): self.results.data = [] return index += 1 progress.setValue(index) seqCount = profile.getActiveFeatureCounts(feature) parentCount = profile.getActiveParentCounts(feature) data = profile.getActiveFeatureProportions(feature) pValue, note = statTest.hypothesisTest(data) effectSize = effectSizeMeasure.run(data) row = [feature, float(pValue), float(pValue), effectSize, note] for i in xrange(0, len(seqCount)): propGroup = [] for j in xrange(0, len(seqCount[i])): propGroup.append(seqCount[i][j] * 100.0 / parentCount[i][j]) meanGroup = mean(propGroup) row.append(meanGroup) row.append(stdDev(propGroup, meanGroup)) for i in xrange(0, len(seqCount)): for j in xrange(0, len(seqCount[i])): row.append(seqCount[i][j]) row.append(parentCount[i][j]) row.append(seqCount[i][j] * 100.0 / parentCount[i][j]) self.results.data.append(row) headingsSampleStats = [] for i in xrange(0, len(profile.activeSamplesInGroups)): for sampleName in profile.activeSamplesInGroups[i]: headingsSampleStats.append(sampleName) headingsSampleStats.append(sampleName + ': parent seq. count') headingsSampleStats.append(sampleName + ': rel. freq. (%)') self.results.createTableHeadings(profile.activeGroupNames, headingsSampleStats) if len(self.results.data) >= 1: # sort results according to p-values self.results.data = TableHelper.SortTable(self.results.data, [self.results.dataHeadings['pValues']]) if progress != None and progress != 'Verbose': index += 1 progress.setValue(index)
results = [ coverageListDP, coverageListDPCC, coverageListNW, coverageListWoolf, coverageListGart, coverageListRP ] lengths = [ ciLengthDP, ciLengthDPCC, ciLengthNW, ciLengthWoolf, ciLengthGart, ciLengthRP ] methodNames = [ 'DP: Asymptotic', 'DP: Asymptotic-CC', 'Newcombe-Wilson', 'Woolf', 'Gart', 'RP: Asympototic' ] for i in xrange(0, len(results)): coverageMeanStr = '%.2f' % mean(results[i]) coverageSdStr = '%.2f' % stdDev(results[i]) coverageMinStr = '%.2f' % min(results[i]) coverageMaxStr = '%.2f' % max(results[i]) lengthMeanStr = '%.2f' % mean(lengths[i]) lengthSdStr = '%.2f' % stdDev(lengths[i]) fout.write(methodNames[i] + '\n') fout.write(coverageMeanStr + '+/-' + coverageSdStr + '[' + coverageMinStr + ';' + coverageMaxStr + ']\n') fout.write(lengthMeanStr + '+/-' + lengthSdStr + '\n') fout.write('\n') fout.close()
#=======================================================================
def run(self, confIntervMethod, coverage, tables, trials, bootstrapRep, progress): tableData = [] index = 0 for row in tables: feature = row[0] seq1 = row[1] seq2 = row[2] parentSeq1 = row[3] parentSeq2 = row[4] lowerCI, upperCI, obsEffectSize = confIntervMethod.run( seq1, seq2, parentSeq1, parentSeq2, coverage) p1 = float(seq1) / parentSeq1 p2 = float(seq2) / parentSeq2 coverageList = [] coverageListLess5 = [] coverageListGreater5 = [] for trial in xrange(0, trials): if progress != '': index += 1 progress.setValue(index) progress.setLabelText(feature + ' - Trial = ' + str(trial)) containedRep = 0 for dummy in xrange(0, bootstrapRep): c1 = binomial(parentSeq1, p1) c2 = binomial(parentSeq2, p2) lowerCI, upperCI, effectSize = confIntervMethod.run( c1, c2, parentSeq1, parentSeq2, coverage) if obsEffectSize >= lowerCI and obsEffectSize <= upperCI: containedRep += 1 if min([seq1, seq2]) <= 5: coverageListLess5.append( float(containedRep) / bootstrapRep) else: coverageListGreater5.append( float(containedRep) / bootstrapRep) coverageList.append(float(containedRep) / bootstrapRep) row = [] row.append(feature) row.append(seq1) row.append(seq2) row.append(parentSeq1) row.append(parentSeq2) row.append(float(seq1) / parentSeq1) row.append(float(seq2) / parentSeq2) row.append(mean(coverageList)) row.append(stdDev(coverageList)) if math.isnan(mean(coverageListLess5)): row.append('') else: row.append(mean(coverageListLess5)) if math.isnan(stdDev(coverageListLess5)): row.append('') else: row.append(stdDev(coverageListLess5)) if math.isnan(mean(coverageListGreater5)): row.append('') else: row.append(mean(coverageListGreater5)) if math.isnan(stdDev(coverageListGreater5)): row.append('') else: row.append(stdDev(coverageListGreater5)) tableData.append(row) return tableData
def run(self, statTest, testType, confIntervMethod, coverage, profile, progress = None): self.results.test = statTest.name self.results.testType = testType self.results.alpha = 1.0 - coverage self.results.confIntervMethod = confIntervMethod self.results.profile = profile if progress == 'Verbose': print ' Processing feature:' self.results.data = [] index = 0 # calculate statistics seqsGroup1 = [] seqsGroup2 = [] parentSeqsGroup1 = [] parentSeqsGroup2 = [] pValues = [] lowerCIs = [] upperCIs = [] effectSizes = [] notes = [] if statTest.bSingleFeatureInterface: # process features one at a time for feature in profile.getFeatures(): if progress == 'Verbose': print ' ' + feature elif progress != None: if progress.wasCanceled(): self.results.data = [] return index += 1 progress.setValue(index) # get statistics seqGroup1, seqGroup2 = profile.getFeatureCounts(feature) parentSeqGroup1, parentSeqGroup2= profile.getParentFeatureCounts(feature) results = statTest.run(seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage) pValueOneSided, pValueTwoSided, lowerCI, upperCI, effectSize, note = results if testType == 'One-sided': pValue = pValueOneSided elif testType == 'Two-sided': pValue = pValueTwoSided else: print 'Error: Unknown test type.' # record results seqsGroup1.append(seqGroup1) seqsGroup2.append(seqGroup2) parentSeqsGroup1.append(parentSeqGroup1) parentSeqsGroup2.append(parentSeqGroup2) pValues.append(pValue) lowerCIs.append(lowerCI) upperCIs.append(upperCI) effectSizes.append(effectSize) notes.append(note) if progress != None and progress != 'Verbose': index += 1 progress.setValue(index) else: # process all features at once seqsGroup1, seqsGroup2 = profile.getFeatureCountsAll() parentSeqsGroup1, parentSeqsGroup2= profile.getParentFeatureCountsAll() pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs, effectSizes, notes = statTest.runAll(seqsGroup1, seqsGroup2, parentSeqsGroup1, parentSeqsGroup2, confIntervMethod, coverage, progress) if progress == 'Verbose': print ' Processing all features...' elif progress != None and progress.wasCanceled(): self.results.data = [] return if testType == 'One-sided': pValues = pValuesOneSided elif testType == 'Two-sided': pValues = pValuesTwoSided else: print 'Error: Unknown test type.' # record statistics features = profile.getFeatures() for i in xrange(0, len(features)): propGroup1 = [] for j in xrange(0, len(seqsGroup1[i])): sg1 = seqsGroup1[i][j] psg1 = parentSeqsGroup1[i][j] if psg1 > 0: propGroup1.append( sg1 * 100.0 / psg1 ) else: propGroup1.append( 0.0 ) propGroup2 = [] for j in xrange(0, len(seqsGroup2[i])): sg2 = seqsGroup2[i][j] psg2 = parentSeqsGroup2[i][j] if psg2 > 0: propGroup2.append( sg2 * 100.0 / psg2 ) else: propGroup2.append( 0.0 ) meanGroup1 = mean(propGroup1) meanGroup2 = mean(propGroup2) row = [features[i], meanGroup1, stdDev(propGroup1, meanGroup1), meanGroup2, stdDev(propGroup2, meanGroup2), float(pValues[i]),float(pValues[i]),float(effectSizes[i]), float(lowerCIs[i]),float(upperCIs[i]), notes[i]] for j in xrange(0, len(seqsGroup1[i])): row.append(seqsGroup1[i][j]) row.append(parentSeqsGroup1[i][j]) if parentSeqsGroup1[i][j] > 0: row.append(seqsGroup1[i][j] * 100.0 / parentSeqsGroup1[i][j]) else: row.append(0.0) for j in xrange(0, len(seqsGroup2[i])): row.append(seqsGroup2[i][j]) row.append(parentSeqsGroup2[i][j]) if parentSeqsGroup2[i][j] > 0: row.append(seqsGroup2[i][j] * 100.0 / parentSeqsGroup2[i][j]) else: row.append(0.0) self.results.data.append(row) headingsSampleStats = [] for sampleName in (profile.samplesInGroup1 + profile.samplesInGroup2): headingsSampleStats.append(sampleName) headingsSampleStats.append(sampleName + ': parent seq. count') headingsSampleStats.append(sampleName + ': rel. freq. (%)') self.results.createTableHeadings(profile.groupName1, profile.groupName2, headingsSampleStats) # sort results according to p-values if len(self.results.data) >= 1: self.results.data = TableHelper.SortTable(self.results.data, [self.results.dataHeadings['pValues']])
def run(self, test, signLevel, statsResults, trials, bootstrapRep, progress): tableData = [] index = 0 for row in statsResults: feature = row[0] seq1 = row[1] seq2 = row[2] parentSeq1 = row[3] parentSeq2 = row[4] p1 = float(seq1) / parentSeq1 p2 = float(seq2) / parentSeq2 powerList = [] powerListLess5 = [] powerListGreater5 = [] for trial in xrange(0, trials): if progress != '': index += 1 progress.setValue(index) progress.setLabelText(feature + ' - Trial = ' + str(trial)) power = 0 processedReplicates = 0 for dummy in xrange(0, bootstrapRep): c1 = 0 c2 = 0 for dummy in xrange(0, parentSeq1): rnd = random.random() if rnd <= p1: c1 += 1 for dummy in xrange(0, parentSeq2): rnd = random.random() if rnd <= p2: c2 += 1 if c1 == 0 and c2 == 0: # This is a special case that many hypothesis test will not handle correctly # so we just ignore it. This will have little effect on the calculated power # of a test. continue processedReplicates += 1 pValueOneSided, pValueTwoSided = test.hypothesisTest(c1, c2, parentSeq1, parentSeq2) if pValueTwoSided < signLevel: power += 1 if processedReplicates > 0: if min([seq1,seq2]) <= 5: powerListLess5.append(float(power) / processedReplicates) else: powerListGreater5.append(float(power) / processedReplicates) powerList.append(float(power) / processedReplicates) row = [] row.append(feature) row.append(seq1) row.append(seq2) row.append(parentSeq1) row.append(parentSeq2) row.append(float(seq1) / parentSeq1) row.append(float(seq2) / parentSeq2) row.append(mean(powerList)) row.append(stdDev(powerList)) if math.isnan(mean(powerListLess5)): row.append('') else: row.append(mean(powerListLess5)) if math.isnan(stdDev(powerListLess5)): row.append('') else: row.append(stdDev(powerListLess5)) if math.isnan(mean(powerListGreater5)): row.append('') else: row.append(mean(powerListGreater5)) if math.isnan(stdDev(powerListGreater5)): row.append('') else: row.append(stdDev(powerListGreater5)) tableData.append(row) return tableData
containedRepRP += 1 ciLengthRP.append(upperCI - lowerCI) coverageListDP.append(float(containedRepDP) / replicates) coverageListDPCC.append(float(containedRepDPCC) / replicates) coverageListNW.append(float(containedRepNW) / replicates) coverageListWoolf.append(float(containedRepWoolf) / replicates) coverageListGart.append(float(containedRepGart) / replicates) coverageListRP.append(float(containedRepRP) / replicates) results = [coverageListDP, coverageListDPCC, coverageListNW, coverageListWoolf, coverageListGart, coverageListRP] lengths = [ciLengthDP,ciLengthDPCC,ciLengthNW,ciLengthWoolf,ciLengthGart,ciLengthRP] methodNames = ['DP: Asymptotic', 'DP: Asymptotic-CC', 'Newcombe-Wilson', 'Woolf', 'Gart', 'RP: Asympototic'] for i in xrange(0, len(results)): coverageMeanStr = '%.2f' % mean(results[i]) coverageSdStr = '%.2f' % stdDev(results[i]) coverageMinStr = '%.2f' % min(results[i]) coverageMaxStr = '%.2f' % max(results[i]) lengthMeanStr = '%.2f' % mean(lengths[i]) lengthSdStr = '%.2f' % stdDev(lengths[i]) fout.write(methodNames[i] + '\n') fout.write(coverageMeanStr + '+/-' + coverageSdStr + '[' + coverageMinStr + ';' + coverageMaxStr + ']\n') fout.write(lengthMeanStr + '+/-' + lengthSdStr+ '\n') fout.write('\n') fout.close()
def run(self, statTest, testType, confIntervMethod, coverage, profile, progress=None): self.results.test = statTest.name self.results.testType = testType self.results.alpha = 1.0 - coverage self.results.confIntervMethod = confIntervMethod self.results.profile = profile if progress == 'Verbose': print ' Processing feature:' self.results.data = [] index = 0 # calculate statistics seqsGroup1 = [] seqsGroup2 = [] parentSeqsGroup1 = [] parentSeqsGroup2 = [] pValues = [] lowerCIs = [] upperCIs = [] effectSizes = [] notes = [] if statTest.bSingleFeatureInterface: # process features one at a time for feature in profile.getFeatures(): if progress == 'Verbose': print ' ' + feature elif progress != None: if progress.wasCanceled(): self.results.data = [] return index += 1 progress.setValue(index) # get statistics seqGroup1, seqGroup2 = profile.getFeatureCounts(feature) parentSeqGroup1, parentSeqGroup2 = profile.getParentFeatureCounts( feature) results = statTest.run(seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage) pValueOneSided, pValueTwoSided, lowerCI, upperCI, effectSize, note = results if testType == 'One-sided': pValue = pValueOneSided elif testType == 'Two-sided': pValue = pValueTwoSided else: print 'Error: Unknown test type.' # record results seqsGroup1.append(seqGroup1) seqsGroup2.append(seqGroup2) parentSeqsGroup1.append(parentSeqGroup1) parentSeqsGroup2.append(parentSeqGroup2) pValues.append(pValue) lowerCIs.append(lowerCI) upperCIs.append(upperCI) effectSizes.append(effectSize) notes.append(note) if progress != None and progress != 'Verbose': index += 1 progress.setValue(index) else: # process all features at once seqsGroup1, seqsGroup2 = profile.getFeatureCountsAll() parentSeqsGroup1, parentSeqsGroup2 = profile.getParentFeatureCountsAll( ) pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs, effectSizes, notes = statTest.runAll( seqsGroup1, seqsGroup2, parentSeqsGroup1, parentSeqsGroup2, confIntervMethod, coverage, progress) if progress == 'Verbose': print ' Processing all features...' elif progress != None and progress.wasCanceled(): self.results.data = [] return if testType == 'One-sided': pValues = pValuesOneSided elif testType == 'Two-sided': pValues = pValuesTwoSided else: print 'Error: Unknown test type.' # record statistics features = profile.getFeatures() for i in xrange(0, len(features)): propGroup1 = [] for j in xrange(0, len(seqsGroup1[i])): sg1 = seqsGroup1[i][j] psg1 = parentSeqsGroup1[i][j] if psg1 > 0: propGroup1.append(sg1 * 100.0 / psg1) else: propGroup1.append(0.0) propGroup2 = [] for j in xrange(0, len(seqsGroup2[i])): sg2 = seqsGroup2[i][j] psg2 = parentSeqsGroup2[i][j] if psg2 > 0: propGroup2.append(sg2 * 100.0 / psg2) else: propGroup2.append(0.0) meanGroup1 = mean(propGroup1) meanGroup2 = mean(propGroup2) row = [ features[i], meanGroup1, stdDev(propGroup1, meanGroup1), meanGroup2, stdDev(propGroup2, meanGroup2), float(pValues[i]), float(pValues[i]), float(effectSizes[i]), float(lowerCIs[i]), float(upperCIs[i]), notes[i] ] for j in xrange(0, len(seqsGroup1[i])): row.append(seqsGroup1[i][j]) row.append(parentSeqsGroup1[i][j]) if parentSeqsGroup1[i][j] > 0: row.append(seqsGroup1[i][j] * 100.0 / parentSeqsGroup1[i][j]) else: row.append(0.0) for j in xrange(0, len(seqsGroup2[i])): row.append(seqsGroup2[i][j]) row.append(parentSeqsGroup2[i][j]) if parentSeqsGroup2[i][j] > 0: row.append(seqsGroup2[i][j] * 100.0 / parentSeqsGroup2[i][j]) else: row.append(0.0) self.results.data.append(row) headingsSampleStats = [] for sampleName in (profile.samplesInGroup1 + profile.samplesInGroup2): headingsSampleStats.append(sampleName) headingsSampleStats.append(sampleName + ': parent seq. count') headingsSampleStats.append(sampleName + ': rel. freq. (%)') self.results.createTableHeadings(profile.groupName1, profile.groupName2, headingsSampleStats) # sort results according to p-values if len(self.results.data) >= 1: self.results.data = TableHelper.SortTable( self.results.data, [self.results.dataHeadings['pValues']])
def run(self, confIntervMethod, coverage, tables, trials, bootstrapRep, progress): tableData = [] index = 0 for row in tables: feature = row[0] seq1 = row[1] seq2 = row[2] parentSeq1 = row[3] parentSeq2 = row[4] lowerCI, upperCI, obsEffectSize = confIntervMethod.run(seq1, seq2, parentSeq1, parentSeq2, coverage) p1 = float(seq1) / parentSeq1 p2 = float(seq2) / parentSeq2 coverageList = [] coverageListLess5 = [] coverageListGreater5 = [] for trial in xrange(0, trials): if progress != '': index += 1 progress.setValue(index) progress.setLabelText(feature + ' - Trial = ' + str(trial)) containedRep = 0 for dummy in xrange(0, bootstrapRep): c1 = binomial(parentSeq1, p1) c2 = binomial(parentSeq2, p2) lowerCI, upperCI, effectSize = confIntervMethod.run(c1, c2, parentSeq1, parentSeq2, coverage) if obsEffectSize >= lowerCI and obsEffectSize <= upperCI: containedRep += 1 if min([seq1,seq2]) <= 5: coverageListLess5.append(float(containedRep) / bootstrapRep) else: coverageListGreater5.append(float(containedRep) / bootstrapRep) coverageList.append(float(containedRep) / bootstrapRep) row = [] row.append(feature) row.append(seq1) row.append(seq2) row.append(parentSeq1) row.append(parentSeq2) row.append(float(seq1) / parentSeq1) row.append(float(seq2) / parentSeq2) row.append(mean(coverageList)) row.append(stdDev(coverageList)) if math.isnan(mean(coverageListLess5)): row.append('') else: row.append(mean(coverageListLess5)) if math.isnan(stdDev(coverageListLess5)): row.append('') else: row.append(stdDev(coverageListLess5)) if math.isnan(mean(coverageListGreater5)): row.append('') else: row.append(mean(coverageListGreater5)) if math.isnan(stdDev(coverageListGreater5)): row.append('') else: row.append(stdDev(coverageListGreater5)) tableData.append(row) return tableData