Example #1
0
    def run(self, test, signLevel, statsResults, trials, bootstrapRep,
            progress):

        tableData = []
        index = 0
        for row in statsResults:
            feature = row[0]
            seq1 = row[1]
            seq2 = row[2]
            parentSeq1 = row[3]
            parentSeq2 = row[4]

            p1 = float(seq1) / parentSeq1
            p2 = float(seq2) / parentSeq2

            powerList = []
            powerListLess5 = []
            powerListGreater5 = []
            for trial in xrange(0, trials):
                if progress != '':
                    index += 1
                    progress.setValue(index)
                    progress.setLabelText(feature + ' - Trial = ' + str(trial))

                power = 0
                processedReplicates = 0
                for dummy in xrange(0, bootstrapRep):
                    c1 = 0
                    c2 = 0
                    for dummy in xrange(0, parentSeq1):
                        rnd = random.random()
                        if rnd <= p1:
                            c1 += 1

                    for dummy in xrange(0, parentSeq2):
                        rnd = random.random()
                        if rnd <= p2:
                            c2 += 1

                    if c1 == 0 and c2 == 0:
                        # This is a special case that many hypothesis test will not handle correctly
                        # so we just ignore it. This will have little effect on the calculated power
                        # of a test.
                        continue

                    processedReplicates += 1

                    pValueOneSided, pValueTwoSided = test.hypothesisTest(
                        c1, c2, parentSeq1, parentSeq2)
                    if pValueTwoSided < signLevel:
                        power += 1

                if processedReplicates > 0:
                    if min([seq1, seq2]) <= 5:
                        powerListLess5.append(
                            float(power) / processedReplicates)
                    else:
                        powerListGreater5.append(
                            float(power) / processedReplicates)

                    powerList.append(float(power) / processedReplicates)

            row = []
            row.append(feature)
            row.append(seq1)
            row.append(seq2)
            row.append(parentSeq1)
            row.append(parentSeq2)
            row.append(float(seq1) / parentSeq1)
            row.append(float(seq2) / parentSeq2)
            row.append(mean(powerList))
            row.append(stdDev(powerList))

            if math.isnan(mean(powerListLess5)):
                row.append('')
            else:
                row.append(mean(powerListLess5))

            if math.isnan(stdDev(powerListLess5)):
                row.append('')
            else:
                row.append(stdDev(powerListLess5))

            if math.isnan(mean(powerListGreater5)):
                row.append('')
            else:
                row.append(mean(powerListGreater5))

            if math.isnan(stdDev(powerListGreater5)):
                row.append('')
            else:
                row.append(stdDev(powerListGreater5))

            tableData.append(row)

        return tableData
Example #2
0
    def run(self, statTest, effectSizeMeasure, profile, progress=None):
        self.results.data = []
        self.results.test = statTest.name
        self.results.profile = profile

        if progress == 'Verbose':
            print '  Processing feature:'

        index = 0
        for feature in profile.getFeatures():
            if progress == 'Verbose':
                print '    ' + feature
            elif progress != None:
                if progress.wasCanceled():
                    self.results.data = []
                    return

                index += 1
                progress.setValue(index)

            seqCount = profile.getActiveFeatureCounts(feature)
            parentCount = profile.getActiveParentCounts(feature)
            data = profile.getActiveFeatureProportions(feature)
            pValue, note = statTest.hypothesisTest(data)
            effectSize = effectSizeMeasure.run(data)

            row = [feature, float(pValue), float(pValue), effectSize, note]

            for i in xrange(0, len(seqCount)):
                propGroup = []
                for j in xrange(0, len(seqCount[i])):
                    sc = seqCount[i][j]
                    pc = parentCount[i][j]
                    if pc > 0:
                        propGroup.append(sc * 100.0 / pc)
                    else:
                        propGroup.append(0.0)

                meanGroup = mean(propGroup)
                row.append(meanGroup)
                row.append(stdDev(propGroup, meanGroup))

            for i in xrange(0, len(seqCount)):
                for j in xrange(0, len(seqCount[i])):
                    sc = seqCount[i][j]
                    pc = parentCount[i][j]
                    row.append(sc)
                    row.append(pc)
                    if pc > 0:
                        row.append(sc * 100.0 / pc)
                    else:
                        row.append(0.0)

            self.results.data.append(row)

        headingsSampleStats = []
        for i in xrange(0, len(profile.activeSamplesInGroups)):
            for sampleName in profile.activeSamplesInGroups[i]:
                headingsSampleStats.append(sampleName)
                headingsSampleStats.append(sampleName + ': parent seq. count')
                headingsSampleStats.append(sampleName + ': rel. freq. (%)')

        self.results.createTableHeadings(profile.activeGroupNames,
                                         headingsSampleStats)

        if len(self.results.data) >= 1:
            # sort results according to p-values
            self.results.data = TableHelper.SortTable(
                self.results.data, [self.results.dataHeadings['pValues']])

        if progress != None and progress != 'Verbose':
            index += 1
            progress.setValue(index)
Example #3
0
    def plot(self, profile, statsResults):
        if len(profile.profileDict) <= 0:
            self.emptyAxis()
            return

        if len(profile.profileDict) > 10000:
            QtGui.QApplication.instance().setOverrideCursor(
                QtGui.QCursor(QtCore.Qt.ArrowCursor))
            reply = QtGui.QMessageBox.question(
                self, 'Continue?', 'Profile contains ' +
                str(len(profile.profileDict)) + ' features. ' +
                'It may take several seconds to generate this plot. Exploring the data at a higher hierarchy level is recommended. '
                + 'Do you wish to continue?', QtGui.QMessageBox.Yes,
                QtGui.QMessageBox.No)
            QtGui.QApplication.instance().restoreOverrideCursor()
            if reply == QtGui.QMessageBox.No:
                self.emptyAxis()
                return

        # *** Colour of plot elements
        axesColour = str(self.preferences['Axes colour'].name())
        group1Colour = str(
            self.preferences['Group colours'][profile.groupName1].name())
        group2Colour = str(
            self.preferences['Group colours'][profile.groupName2].name())

        # *** Set sample names
        self.groupName1 = profile.groupName1
        self.groupName2 = profile.groupName2

        # *** Create lists for each quantity of interest and calculate spread of data
        groupData1, groupData2 = profile.getFeatureProportionsAll()
        features = profile.getFeatures()

        field1 = []
        field2 = []
        xSpread = []
        ySpread = []
        for i in xrange(0, len(groupData1)):
            mean1 = mean(groupData1[i])
            mean2 = mean(groupData2[i])

            field1.append(mean1)
            field2.append(mean2)

            if self.spreadMethod == 'standard deviation':
                xSpread.append([
                    max(mean1 - stdDev(groupData1[i], mean1), 0),
                    min(mean1 + stdDev(groupData1[i], mean1), 100)
                ])
                ySpread.append([
                    max(mean2 - stdDev(groupData2[i], mean2), 0),
                    min(mean2 + stdDev(groupData2[i], mean2), 100)
                ])
            elif self.spreadMethod == '2 * standard deviation':
                xSpread.append([
                    max(mean1 - 2 * stdDev(groupData1[i], mean1), 0),
                    min(mean1 + 2 * stdDev(groupData1[i], mean1), 100)
                ])
                ySpread.append([
                    max(mean2 - 2 * stdDev(groupData2[i], mean2), 0),
                    min(mean2 + 2 * stdDev(groupData2[i], mean2), 100)
                ])
            elif self.spreadMethod == '25th and 75th percentile':
                spread1 = mquantiles(groupData1[i], prob=[0.25, 0.75])
                spread2 = mquantiles(groupData2[i], prob=[0.25, 0.75])
                xSpread.append([max(spread1[0], 0), min(spread1[1], 100)])
                ySpread.append([max(spread2[0], 0), min(spread2[1], 100)])
            elif self.spreadMethod == '9th and 91st percentile':
                spread1 = mquantiles(groupData1[i], prob=[0.09, 0.91])
                spread2 = mquantiles(groupData2[i], prob=[0.09, 0.91])
                xSpread.append([max(spread1[0], 0), min(spread1[1], 100)])
                ySpread.append([max(spread2[0], 0), min(spread2[1], 100)])
            elif self.spreadMethod == '2nd and 98th percentile':
                spread1 = mquantiles(groupData1[i], prob=[0.02, 0.98])
                spread2 = mquantiles(groupData2[i], prob=[0.02, 0.98])
                xSpread.append([max(spread1[0], 0), min(spread1[1], 100)])
                ySpread.append([max(spread2[0], 0), min(spread2[1], 100)])
            elif self.spreadMethod == 'minimum and maximum':
                xSpread.append([max(groupData1[i]), min(groupData1[i])])
                ySpread.append([max(groupData2[i]), min(groupData2[i])])

        # *** Set figure size
        self.fig.clear()
        self.fig.set_size_inches(self.figWidth, self.figHeight)

        if self.bShowHistograms:
            histogramSizeX = self.histogramSize / self.figWidth
            histogramSizeY = self.histogramSize / self.figHeight
        else:
            histogramSizeX = 0.0
            histogramSizeY = 0.0

        padding = 0.1  # inches
        xOffsetFigSpace = (0.4 + padding) / self.figWidth
        yOffsetFigSpace = (0.3 + padding) / self.figHeight
        axesScatter = self.fig.add_axes([
            xOffsetFigSpace, yOffsetFigSpace, 1.0 - xOffsetFigSpace -
            histogramSizeX - (2 * padding) / self.figWidth, 1.0 -
            yOffsetFigSpace - histogramSizeY - (2 * padding) / self.figHeight
        ])

        if self.bShowHistograms:
            axesTopHistogram = self.fig.add_axes([
                xOffsetFigSpace,
                1.0 - histogramSizeY - padding / self.figHeight,
                1.0 - xOffsetFigSpace - histogramSizeX -
                (2 * padding) / self.figWidth, histogramSizeY
            ])

            axesRightHistogram = self.fig.add_axes([
                1.0 - histogramSizeX - padding / self.figWidth,
                yOffsetFigSpace, histogramSizeX, 1.0 - yOffsetFigSpace -
                histogramSizeY - (2 * padding) / self.figHeight
            ])

        # *** Handle mouse events
        tooltips = []
        for i in xrange(0, len(field1)):
            tooltip = features[i] + '\n\n'
            tooltip += (self.groupName1 +
                        ' mean proportion: %.3f' % field1[i]) + '\n'
            tooltip += (self.groupName2 +
                        ' mean proportion: %.3f' % field2[i]) + '\n\n'
            tooltip += 'Difference between mean proportions (%): ' + (
                '%.3f' % (field1[i] - field2[i])) + '\n'

            if field2[i] != 0:
                tooltip += 'Ratio of mean proportions: %.3f' % (field1[i] /
                                                                field2[i])
            else:
                tooltip += 'Ratio of mean proportions: undefined'

            if statsResults.profile != None:
                pValue = statsResults.getFeatureStatisticAsStr(
                    features[i], 'pValues')
                pValueCorrected = statsResults.getFeatureStatisticAsStr(
                    features[i], 'pValuesCorrected')
                tooltip += '\n\n'
                tooltip += 'p-value: ' + pValue + '\n'
                tooltip += 'Corrected p-value: ' + pValueCorrected

            tooltips.append(tooltip)

        self.plotEventHandler = PlotEventHandler(field1, field2, tooltips)

        self.mouseEventCallback(self.plotEventHandler)

        # *** Calculate R^2 value
        slope, intercept, r_value, p_value, std_err = linregress(
            field1, field2)

        # *** Plot data

        # set visual properties of all points
        colours = []
        highlightedField1 = []
        highlightedField2 = []
        highlighColours = []
        for i in xrange(0, len(field1)):
            if field1[i] > field2[i]:
                colours.append(group1Colour)
            else:
                colours.append(group2Colour)

            if features[i] in self.preferences['Highlighted group features']:
                highlightedField1.append(field1[i])
                highlightedField2.append(field2[i])
                highlighColours.append(colours[i])

        # scatter plot
        axesScatter.scatter(field1,
                            field2,
                            c=colours,
                            s=self.markerSize,
                            zorder=5)
        if len(highlightedField1) > 0:
            axesScatter.scatter(highlightedField1,
                                highlightedField2,
                                c=highlighColours,
                                s=self.markerSize,
                                edgecolors='red',
                                linewidth=2,
                                zorder=10)

        # plot CIs
        if self.spreadMethod != 'None':
            xlist = []
            ylist = []
            for i in xrange(0, len(field1)):
                # horizontal CIs
                xlist.append(xSpread[i][0])
                xlist.append(xSpread[i][1])
                xlist.append(None)
                ylist.append(field2[i])
                ylist.append(field2[i])
                ylist.append(None)

                # vertical CIs
                xlist.append(field1[i])
                xlist.append(field1[i])
                xlist.append(None)
                ylist.append(ySpread[i][0])
                ylist.append(ySpread[i][1])
                ylist.append(None)

            axesScatter.plot(xlist,
                             ylist,
                             '-',
                             color='gray',
                             antialiased=False)

        # plot y=x line
        maxProportion = max(max(field1), max(field2)) * 1.05
        axesScatter.plot([0, maxProportion], [0, maxProportion],
                         color=axesColour,
                         linestyle='dashed',
                         marker='',
                         zorder=1)

        axesScatter.set_xlabel(self.groupName1 + ' (%)')
        axesScatter.set_ylabel(self.groupName2 + ' (%)')

        if self.bShowR2:
            axesScatter.text(0.02,
                             0.98,
                             r'R$^2$ = ' + ('%0.3f' % r_value**2),
                             horizontalalignment='left',
                             verticalalignment='top',
                             transform=axesScatter.transAxes)

        axesScatter.set_xlim(0, maxProportion)
        axesScatter.set_ylim(0, maxProportion)

        # *** Prettify scatter plot
        for line in axesScatter.yaxis.get_ticklines():
            line.set_color(axesColour)

        for line in axesScatter.xaxis.get_ticklines():
            line.set_color(axesColour)

        for loc, spine in axesScatter.spines.iteritems():
            spine.set_color(axesColour)

        # plot histograms
        if not self.bShowHistograms:
            for a in axesScatter.yaxis.majorTicks:
                a.tick1On = True
                a.tick2On = False

            for a in axesScatter.xaxis.majorTicks:
                a.tick1On = True
                a.tick2On = False

            for line in axesScatter.yaxis.get_ticklines():
                line.set_color(axesColour)

            for line in axesScatter.xaxis.get_ticklines():
                line.set_color(axesColour)

            for loc, spine in axesScatter.spines.iteritems():
                if loc in ['right', 'top']:
                    spine.set_color('none')
                else:
                    spine.set_color(axesColour)

        else:  # show histograms
            # plot top histogram
            axesTopHistogram.xaxis.set_major_formatter(NullFormatter())
            pdf, bins, patches = axesTopHistogram.hist(field1,
                                                       bins=self.numBins,
                                                       facecolor=group1Colour)
            axesTopHistogram.set_xlim(axesScatter.get_xlim())
            axesTopHistogram.set_yticks([0, max(pdf)])
            axesTopHistogram.set_ylim([0, max(pdf) * 1.05])

            # plot right histogram
            axesRightHistogram.yaxis.set_major_formatter(NullFormatter())
            pdf, bins, patches = axesRightHistogram.hist(
                field2,
                bins=self.numBins,
                orientation='horizontal',
                facecolor=group2Colour)
            axesRightHistogram.set_ylim(axesScatter.get_ylim())
            axesRightHistogram.set_xticks([0, max(pdf)])
            axesRightHistogram.set_xlim([0, max(pdf) * 1.05])

            # *** Prettify histogram plot
            for a in axesTopHistogram.yaxis.majorTicks:
                a.tick1On = True
                a.tick2On = False

            for a in axesTopHistogram.xaxis.majorTicks:
                a.tick1On = True
                a.tick2On = False

            for line in axesTopHistogram.yaxis.get_ticklines():
                line.set_color(axesColour)

            for line in axesTopHistogram.xaxis.get_ticklines():
                line.set_color(axesColour)

            for loc, spine in axesTopHistogram.spines.iteritems():
                if loc in ['right', 'top']:
                    spine.set_color('none')
                else:
                    spine.set_color(axesColour)

            for a in axesRightHistogram.yaxis.majorTicks:
                a.tick1On = True
                a.tick2On = False

            for a in axesRightHistogram.xaxis.majorTicks:
                a.tick1On = True
                a.tick2On = False

            for line in axesRightHistogram.yaxis.get_ticklines():
                line.set_color(axesColour)

            for line in axesRightHistogram.xaxis.get_ticklines():
                line.set_color(axesColour)

            for loc, spine in axesRightHistogram.spines.iteritems():
                if loc in ['right', 'top']:
                    spine.set_color('none')
                else:
                    spine.set_color(axesColour)

        self.updateGeometry()
        self.draw()
Example #4
0
	def run(self, statTest, effectSizeMeasure, profile, progress = None):
		self.results.data = []
		self.results.test = statTest.name
		self.results.profile = profile
		
		if progress == 'Verbose':
			print '  Processing feature:'
		 
		index = 0
		for feature in profile.getFeatures():
			if progress == 'Verbose':
					print '    ' + feature
			elif progress != None:
				if progress.wasCanceled():
					self.results.data = []
					return

				index += 1
				progress.setValue(index)
								
			seqCount = profile.getActiveFeatureCounts(feature)
			parentCount = profile.getActiveParentCounts(feature)
			data = profile.getActiveFeatureProportions(feature)
			pValue, note = statTest.hypothesisTest(data)
			effectSize = effectSizeMeasure.run(data)
 
			row = [feature, float(pValue), float(pValue), effectSize, note]
			
			for i in xrange(0, len(seqCount)):
				propGroup = []
				for j in xrange(0, len(seqCount[i])):
					propGroup.append(seqCount[i][j] * 100.0 / parentCount[i][j])
					
				meanGroup = mean(propGroup)
				row.append(meanGroup)
				row.append(stdDev(propGroup, meanGroup))
			
			for i in xrange(0, len(seqCount)):
				for j in xrange(0, len(seqCount[i])):
					row.append(seqCount[i][j])
					row.append(parentCount[i][j])
					row.append(seqCount[i][j] * 100.0 / parentCount[i][j])
					
			self.results.data.append(row)

		headingsSampleStats = []
		for i in xrange(0, len(profile.activeSamplesInGroups)):
			for sampleName in profile.activeSamplesInGroups[i]:
				headingsSampleStats.append(sampleName)
				headingsSampleStats.append(sampleName + ': parent seq. count')
				headingsSampleStats.append(sampleName + ': rel. freq. (%)')
			
		self.results.createTableHeadings(profile.activeGroupNames, headingsSampleStats)
			
		if len(self.results.data) >= 1:
			# sort results according to p-values
			self.results.data = TableHelper.SortTable(self.results.data, [self.results.dataHeadings['pValues']])
			
		if progress != None and progress != 'Verbose':
			index += 1
			progress.setValue(index)
Example #5
0
results = [
    coverageListDP, coverageListDPCC, coverageListNW, coverageListWoolf,
    coverageListGart, coverageListRP
]
lengths = [
    ciLengthDP, ciLengthDPCC, ciLengthNW, ciLengthWoolf, ciLengthGart,
    ciLengthRP
]
methodNames = [
    'DP: Asymptotic', 'DP: Asymptotic-CC', 'Newcombe-Wilson', 'Woolf', 'Gart',
    'RP: Asympototic'
]

for i in xrange(0, len(results)):
    coverageMeanStr = '%.2f' % mean(results[i])
    coverageSdStr = '%.2f' % stdDev(results[i])
    coverageMinStr = '%.2f' % min(results[i])
    coverageMaxStr = '%.2f' % max(results[i])

    lengthMeanStr = '%.2f' % mean(lengths[i])
    lengthSdStr = '%.2f' % stdDev(lengths[i])

    fout.write(methodNames[i] + '\n')
    fout.write(coverageMeanStr + '+/-' + coverageSdStr + '[' + coverageMinStr +
               ';' + coverageMaxStr + ']\n')
    fout.write(lengthMeanStr + '+/-' + lengthSdStr + '\n')
    fout.write('\n')

fout.close()
Example #6
0
#=======================================================================
    def run(self, confIntervMethod, coverage, tables, trials, bootstrapRep,
            progress):

        tableData = []
        index = 0
        for row in tables:
            feature = row[0]
            seq1 = row[1]
            seq2 = row[2]
            parentSeq1 = row[3]
            parentSeq2 = row[4]

            lowerCI, upperCI, obsEffectSize = confIntervMethod.run(
                seq1, seq2, parentSeq1, parentSeq2, coverage)

            p1 = float(seq1) / parentSeq1
            p2 = float(seq2) / parentSeq2

            coverageList = []
            coverageListLess5 = []
            coverageListGreater5 = []
            for trial in xrange(0, trials):
                if progress != '':
                    index += 1
                    progress.setValue(index)
                    progress.setLabelText(feature + ' - Trial = ' + str(trial))

                containedRep = 0
                for dummy in xrange(0, bootstrapRep):
                    c1 = binomial(parentSeq1, p1)
                    c2 = binomial(parentSeq2, p2)

                    lowerCI, upperCI, effectSize = confIntervMethod.run(
                        c1, c2, parentSeq1, parentSeq2, coverage)
                    if obsEffectSize >= lowerCI and obsEffectSize <= upperCI:
                        containedRep += 1

                if min([seq1, seq2]) <= 5:
                    coverageListLess5.append(
                        float(containedRep) / bootstrapRep)
                else:
                    coverageListGreater5.append(
                        float(containedRep) / bootstrapRep)

                coverageList.append(float(containedRep) / bootstrapRep)

            row = []
            row.append(feature)
            row.append(seq1)
            row.append(seq2)
            row.append(parentSeq1)
            row.append(parentSeq2)
            row.append(float(seq1) / parentSeq1)
            row.append(float(seq2) / parentSeq2)
            row.append(mean(coverageList))
            row.append(stdDev(coverageList))

            if math.isnan(mean(coverageListLess5)):
                row.append('')
            else:
                row.append(mean(coverageListLess5))

            if math.isnan(stdDev(coverageListLess5)):
                row.append('')
            else:
                row.append(stdDev(coverageListLess5))

            if math.isnan(mean(coverageListGreater5)):
                row.append('')
            else:
                row.append(mean(coverageListGreater5))

            if math.isnan(stdDev(coverageListGreater5)):
                row.append('')
            else:
                row.append(stdDev(coverageListGreater5))

            tableData.append(row)

        return tableData
Example #8
0
	def run(self, statTest, testType, confIntervMethod, coverage, profile, progress = None):
		self.results.test = statTest.name
		self.results.testType = testType
		self.results.alpha = 1.0 - coverage
		self.results.confIntervMethod = confIntervMethod
		self.results.profile = profile
		
		if progress == 'Verbose':
			print '  Processing feature:'
		 
		self.results.data = []
		index = 0
		
		# calculate statistics
		seqsGroup1 = []
		seqsGroup2 = []
		parentSeqsGroup1 = []
		parentSeqsGroup2 = []
		pValues = []
		lowerCIs = []
		upperCIs = []
		effectSizes = []
		notes = []
		if statTest.bSingleFeatureInterface:
			# process features one at a time
			for feature in profile.getFeatures():
				if progress == 'Verbose':
					print '    ' + feature
				elif progress != None:
					if progress.wasCanceled():
						self.results.data = []
						return

					index += 1
					progress.setValue(index)
															
				# get statistics
				seqGroup1, seqGroup2 = profile.getFeatureCounts(feature)
				parentSeqGroup1, parentSeqGroup2= profile.getParentFeatureCounts(feature)
				results = statTest.run(seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage)
				pValueOneSided, pValueTwoSided, lowerCI, upperCI, effectSize, note = results
				
				if testType == 'One-sided':
					pValue = pValueOneSided
				elif testType == 'Two-sided':
					pValue = pValueTwoSided
				else:
					print 'Error: Unknown test type.'
	 
				# record results
				seqsGroup1.append(seqGroup1)
				seqsGroup2.append(seqGroup2)
				parentSeqsGroup1.append(parentSeqGroup1)
				parentSeqsGroup2.append(parentSeqGroup2)
				pValues.append(pValue)
				lowerCIs.append(lowerCI)
				upperCIs.append(upperCI)
				effectSizes.append(effectSize)
				notes.append(note)
				
			if progress != None and progress != 'Verbose':
				index += 1
				progress.setValue(index)
		else:
			# process all features at once
			seqsGroup1, seqsGroup2 = profile.getFeatureCountsAll()
			parentSeqsGroup1, parentSeqsGroup2= profile.getParentFeatureCountsAll()
			pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs, effectSizes, notes = statTest.runAll(seqsGroup1, seqsGroup2, parentSeqsGroup1, parentSeqsGroup2, confIntervMethod, coverage, progress)
			if progress == 'Verbose':
				print '    Processing all features...'
			elif progress != None and progress.wasCanceled():
				self.results.data = []
				return

			if testType == 'One-sided':
				pValues = pValuesOneSided
			elif testType == 'Two-sided':
				pValues = pValuesTwoSided
			else:
				print 'Error: Unknown test type.'
				
		# record statistics
		features = profile.getFeatures()
		for i in xrange(0, len(features)):
			propGroup1 = []
			for j in xrange(0, len(seqsGroup1[i])):
				sg1 = seqsGroup1[i][j]
				psg1 = parentSeqsGroup1[i][j]
				
				if psg1 > 0:
					propGroup1.append( sg1 * 100.0 / psg1 )
				else:
					propGroup1.append( 0.0 )
			
			propGroup2 = []
			for j in xrange(0, len(seqsGroup2[i])):
				sg2 = seqsGroup2[i][j]
				psg2 = parentSeqsGroup2[i][j]
				
				if psg2 > 0:
					propGroup2.append( sg2 * 100.0 / psg2 )
				else:
					propGroup2.append( 0.0 )
			
			meanGroup1 = mean(propGroup1)
			meanGroup2 = mean(propGroup2)
			row = [features[i], meanGroup1, stdDev(propGroup1, meanGroup1), meanGroup2, stdDev(propGroup2, meanGroup2),
							float(pValues[i]),float(pValues[i]),float(effectSizes[i]),
							float(lowerCIs[i]),float(upperCIs[i]), notes[i]]
							
			for j in xrange(0, len(seqsGroup1[i])):
				row.append(seqsGroup1[i][j])
				row.append(parentSeqsGroup1[i][j])
				if parentSeqsGroup1[i][j] > 0:
					row.append(seqsGroup1[i][j] * 100.0 / parentSeqsGroup1[i][j])
				else:
					row.append(0.0)
				
			for j in xrange(0, len(seqsGroup2[i])):
				row.append(seqsGroup2[i][j])
				row.append(parentSeqsGroup2[i][j])
				if parentSeqsGroup2[i][j] > 0:
					row.append(seqsGroup2[i][j] * 100.0 / parentSeqsGroup2[i][j])
				else:
					row.append(0.0)
																
			self.results.data.append(row)
																
		headingsSampleStats = []
		for sampleName in (profile.samplesInGroup1 + profile.samplesInGroup2):
			headingsSampleStats.append(sampleName)
			headingsSampleStats.append(sampleName + ': parent seq. count')
			headingsSampleStats.append(sampleName + ': rel. freq. (%)')
			
		self.results.createTableHeadings(profile.groupName1, profile.groupName2, headingsSampleStats)
		
		# sort results according to p-values
		if len(self.results.data) >= 1:
			self.results.data = TableHelper.SortTable(self.results.data, [self.results.dataHeadings['pValues']])
Example #9
0
#=======================================================================
Example #10
0
  def run(self, test, signLevel, statsResults, trials, bootstrapRep, progress):
     
    tableData = []
    index = 0
    for row in statsResults:                    
      feature = row[0]
      seq1 = row[1]
      seq2 = row[2]
      parentSeq1 = row[3]
      parentSeq2 = row[4]

      p1 = float(seq1) / parentSeq1
      p2 = float(seq2) / parentSeq2
    
      powerList = []  
      powerListLess5 = []  
      powerListGreater5 = []  
      for trial in xrange(0, trials): 
        if progress != '':
          index += 1
          progress.setValue(index)
          progress.setLabelText(feature + ' - Trial = ' + str(trial))   
          
        power = 0
        processedReplicates = 0
        for dummy in xrange(0, bootstrapRep):
          c1 = 0
          c2 = 0
          for dummy in xrange(0, parentSeq1):
            rnd = random.random()
            if rnd <= p1:
              c1 += 1
              
          for dummy in xrange(0, parentSeq2):
            rnd = random.random()
            if rnd <= p2:
              c2 += 1
      
          if c1 == 0 and c2 == 0:
            # This is a special case that many hypothesis test will not handle correctly
            # so we just ignore it. This will have little effect on the calculated power
            # of a test.
            continue
          
          processedReplicates += 1
          
          pValueOneSided, pValueTwoSided = test.hypothesisTest(c1, c2, parentSeq1, parentSeq2)
          if pValueTwoSided < signLevel:
            power += 1      
               
        if processedReplicates > 0:
          if min([seq1,seq2]) <= 5:
            powerListLess5.append(float(power) / processedReplicates)
          else:
            powerListGreater5.append(float(power) / processedReplicates)
            
          powerList.append(float(power) / processedReplicates)
  
      row = []
      row.append(feature)
      row.append(seq1)
      row.append(seq2)
      row.append(parentSeq1)
      row.append(parentSeq2)
      row.append(float(seq1) / parentSeq1)
      row.append(float(seq2) / parentSeq2)
      row.append(mean(powerList))
      row.append(stdDev(powerList))
      
      if math.isnan(mean(powerListLess5)):
        row.append('')
      else:
        row.append(mean(powerListLess5))
        
      if math.isnan(stdDev(powerListLess5)):
        row.append('')
      else:
        row.append(stdDev(powerListLess5))
        
      if math.isnan(mean(powerListGreater5)):
        row.append('')
      else:
        row.append(mean(powerListGreater5))
        
      if math.isnan(stdDev(powerListGreater5)):
        row.append('')
      else:
        row.append(stdDev(powerListGreater5))

      tableData.append(row)
      
    return tableData
Example #11
0
#=======================================================================
Example #12
0
        containedRepRP += 1  
      ciLengthRP.append(upperCI - lowerCI)
            
    coverageListDP.append(float(containedRepDP) / replicates)
    coverageListDPCC.append(float(containedRepDPCC) / replicates)
    coverageListNW.append(float(containedRepNW) / replicates)
    coverageListWoolf.append(float(containedRepWoolf) / replicates)
    coverageListGart.append(float(containedRepGart) / replicates)
    coverageListRP.append(float(containedRepRP) / replicates)
     
results = [coverageListDP, coverageListDPCC, coverageListNW, coverageListWoolf, coverageListGart, coverageListRP]  
lengths = [ciLengthDP,ciLengthDPCC,ciLengthNW,ciLengthWoolf,ciLengthGart,ciLengthRP]   
methodNames = ['DP: Asymptotic', 'DP: Asymptotic-CC', 'Newcombe-Wilson', 'Woolf', 'Gart', 'RP: Asympototic']

for i in xrange(0, len(results)):
  coverageMeanStr = '%.2f' % mean(results[i])
  coverageSdStr = '%.2f' % stdDev(results[i])
  coverageMinStr = '%.2f' % min(results[i])
  coverageMaxStr = '%.2f' % max(results[i])
  
  lengthMeanStr = '%.2f' % mean(lengths[i])
  lengthSdStr = '%.2f' % stdDev(lengths[i])
     
  fout.write(methodNames[i] + '\n')
  fout.write(coverageMeanStr + '+/-' + coverageSdStr + '[' + coverageMinStr + ';' + coverageMaxStr + ']\n')
  fout.write(lengthMeanStr + '+/-' + lengthSdStr+ '\n')
  fout.write('\n')
  
fout.close()

    
Example #13
0
    def run(self,
            statTest,
            testType,
            confIntervMethod,
            coverage,
            profile,
            progress=None):
        self.results.test = statTest.name
        self.results.testType = testType
        self.results.alpha = 1.0 - coverage
        self.results.confIntervMethod = confIntervMethod
        self.results.profile = profile

        if progress == 'Verbose':
            print '  Processing feature:'

        self.results.data = []
        index = 0

        # calculate statistics
        seqsGroup1 = []
        seqsGroup2 = []
        parentSeqsGroup1 = []
        parentSeqsGroup2 = []
        pValues = []
        lowerCIs = []
        upperCIs = []
        effectSizes = []
        notes = []
        if statTest.bSingleFeatureInterface:
            # process features one at a time
            for feature in profile.getFeatures():
                if progress == 'Verbose':
                    print '    ' + feature
                elif progress != None:
                    if progress.wasCanceled():
                        self.results.data = []
                        return

                    index += 1
                    progress.setValue(index)

                # get statistics
                seqGroup1, seqGroup2 = profile.getFeatureCounts(feature)
                parentSeqGroup1, parentSeqGroup2 = profile.getParentFeatureCounts(
                    feature)
                results = statTest.run(seqGroup1, seqGroup2, parentSeqGroup1,
                                       parentSeqGroup2, confIntervMethod,
                                       coverage)
                pValueOneSided, pValueTwoSided, lowerCI, upperCI, effectSize, note = results

                if testType == 'One-sided':
                    pValue = pValueOneSided
                elif testType == 'Two-sided':
                    pValue = pValueTwoSided
                else:
                    print 'Error: Unknown test type.'

                # record results
                seqsGroup1.append(seqGroup1)
                seqsGroup2.append(seqGroup2)
                parentSeqsGroup1.append(parentSeqGroup1)
                parentSeqsGroup2.append(parentSeqGroup2)
                pValues.append(pValue)
                lowerCIs.append(lowerCI)
                upperCIs.append(upperCI)
                effectSizes.append(effectSize)
                notes.append(note)

            if progress != None and progress != 'Verbose':
                index += 1
                progress.setValue(index)
        else:
            # process all features at once
            seqsGroup1, seqsGroup2 = profile.getFeatureCountsAll()
            parentSeqsGroup1, parentSeqsGroup2 = profile.getParentFeatureCountsAll(
            )
            pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs, effectSizes, notes = statTest.runAll(
                seqsGroup1, seqsGroup2, parentSeqsGroup1, parentSeqsGroup2,
                confIntervMethod, coverage, progress)
            if progress == 'Verbose':
                print '    Processing all features...'
            elif progress != None and progress.wasCanceled():
                self.results.data = []
                return

            if testType == 'One-sided':
                pValues = pValuesOneSided
            elif testType == 'Two-sided':
                pValues = pValuesTwoSided
            else:
                print 'Error: Unknown test type.'

        # record statistics
        features = profile.getFeatures()
        for i in xrange(0, len(features)):
            propGroup1 = []
            for j in xrange(0, len(seqsGroup1[i])):
                sg1 = seqsGroup1[i][j]
                psg1 = parentSeqsGroup1[i][j]

                if psg1 > 0:
                    propGroup1.append(sg1 * 100.0 / psg1)
                else:
                    propGroup1.append(0.0)

            propGroup2 = []
            for j in xrange(0, len(seqsGroup2[i])):
                sg2 = seqsGroup2[i][j]
                psg2 = parentSeqsGroup2[i][j]

                if psg2 > 0:
                    propGroup2.append(sg2 * 100.0 / psg2)
                else:
                    propGroup2.append(0.0)

            meanGroup1 = mean(propGroup1)
            meanGroup2 = mean(propGroup2)
            row = [
                features[i], meanGroup1,
                stdDev(propGroup1, meanGroup1), meanGroup2,
                stdDev(propGroup2, meanGroup2),
                float(pValues[i]),
                float(pValues[i]),
                float(effectSizes[i]),
                float(lowerCIs[i]),
                float(upperCIs[i]), notes[i]
            ]

            for j in xrange(0, len(seqsGroup1[i])):
                row.append(seqsGroup1[i][j])
                row.append(parentSeqsGroup1[i][j])
                if parentSeqsGroup1[i][j] > 0:
                    row.append(seqsGroup1[i][j] * 100.0 /
                               parentSeqsGroup1[i][j])
                else:
                    row.append(0.0)

            for j in xrange(0, len(seqsGroup2[i])):
                row.append(seqsGroup2[i][j])
                row.append(parentSeqsGroup2[i][j])
                if parentSeqsGroup2[i][j] > 0:
                    row.append(seqsGroup2[i][j] * 100.0 /
                               parentSeqsGroup2[i][j])
                else:
                    row.append(0.0)

            self.results.data.append(row)

        headingsSampleStats = []
        for sampleName in (profile.samplesInGroup1 + profile.samplesInGroup2):
            headingsSampleStats.append(sampleName)
            headingsSampleStats.append(sampleName + ': parent seq. count')
            headingsSampleStats.append(sampleName + ': rel. freq. (%)')

        self.results.createTableHeadings(profile.groupName1,
                                         profile.groupName2,
                                         headingsSampleStats)

        # sort results according to p-values
        if len(self.results.data) >= 1:
            self.results.data = TableHelper.SortTable(
                self.results.data, [self.results.dataHeadings['pValues']])
Example #14
0
  def run(self, confIntervMethod, coverage, tables, trials, bootstrapRep, progress):
  
    tableData = []
    index = 0
    for row in tables:                    
      feature = row[0]
      seq1 = row[1]
      seq2 = row[2]
      parentSeq1 = row[3]
      parentSeq2 = row[4]
    
      lowerCI, upperCI, obsEffectSize = confIntervMethod.run(seq1, seq2, parentSeq1, parentSeq2, coverage) 
    
      p1 = float(seq1) / parentSeq1
      p2 = float(seq2) / parentSeq2
    
      coverageList = []  
      coverageListLess5 = []  
      coverageListGreater5 = []  
      for trial in xrange(0, trials): 
        if progress != '':
          index += 1
          progress.setValue(index)
          progress.setLabelText(feature + ' - Trial = ' + str(trial))  
          
        containedRep = 0
        for dummy in xrange(0, bootstrapRep):
          c1 = binomial(parentSeq1, p1)
          c2 = binomial(parentSeq2, p2)
      
          lowerCI, upperCI, effectSize = confIntervMethod.run(c1, c2, parentSeq1, parentSeq2, coverage)
          if obsEffectSize >= lowerCI and obsEffectSize <= upperCI:
            containedRep += 1        
               
        if min([seq1,seq2]) <= 5:
          coverageListLess5.append(float(containedRep) / bootstrapRep)
        else:
          coverageListGreater5.append(float(containedRep) / bootstrapRep)
          
        coverageList.append(float(containedRep) / bootstrapRep)
  
      row = []
      row.append(feature)
      row.append(seq1)
      row.append(seq2)
      row.append(parentSeq1)
      row.append(parentSeq2)
      row.append(float(seq1) / parentSeq1)
      row.append(float(seq2) / parentSeq2)
      row.append(mean(coverageList))
      row.append(stdDev(coverageList))
      
      if math.isnan(mean(coverageListLess5)):
        row.append('')
      else:
        row.append(mean(coverageListLess5))
        
      if math.isnan(stdDev(coverageListLess5)):
        row.append('')
      else:
        row.append(stdDev(coverageListLess5))
        
      if math.isnan(mean(coverageListGreater5)):
        row.append('')
      else:
        row.append(mean(coverageListGreater5))
        
      if math.isnan(stdDev(coverageListGreater5)):
        row.append('')
      else:
        row.append(stdDev(coverageListGreater5))

      tableData.append(row)
      
    return tableData
Example #15
0
#=======================================================================