Esempio n. 1
0
	def equalSampleSize(self, seq1, seq2, totalSeq1, totalSeq2, alpha, beta):
		oneMinusAlpha = 1.0 - alpha
		oneMinusBeta = 1.0 - beta
		
		p1 = float(seq1) / totalSeq1
		p2 = float(seq2) / totalSeq2
		q1 = 1.0 - p1
		q2 = 1.0 - p2
		d = p1 - p2
		
		if d == 0:
			return 1	

		return (zScore(oneMinusAlpha) * math.sqrt((p1 + p2)*(q1 + q2)/2) + zScore(oneMinusBeta)*math.sqrt((p1*q1) + (p2*q2)))**2 / (d**2)
Esempio n. 2
0
    def power(self, seq1, seq2, totalSeq1, totalSeq2, alpha):
        # The chi-square test is equivalent to the difference between proportions
        # test as illustrated by Rivals et al., 2007. Here we use the standard
        # asymptotic power formulation for a difference between proportions test.
        oneMinusAlpha = 1.0 - alpha

        p1 = float(seq1) / totalSeq1
        p2 = float(seq2) / totalSeq2
        d = p1 - p2

        stdDev = math.sqrt((p1 * (1 - p1)) / totalSeq1 +
                           (p2 * (1 - p2)) / totalSeq2)

        if stdDev != 0:
            p = float(totalSeq1 * p1 + totalSeq2 * p2) / (totalSeq1 +
                                                          totalSeq2)
            q = 1 - p
            pooledStdDev = math.sqrt((p * q) / totalSeq1 + (p * q) / totalSeq2)

            zScore = zScore(oneMinusAlpha)
            zLower = (-zScore * pooledStdDev - d) / stdDev
            zUpper = (zScore * pooledStdDev - d) / stdDev

            return standardNormalCDF(zLower) + (1.0 -
                                                standardNormalCDF(zUpper))
        else:
            return 1.0
Esempio n. 3
0
    def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
        '''
		Calculate ratio of proportions (relative risk) confidence interval. 
		'''
        note = ''
        if seq1 == 0 or seq2 == 0:
            pseudocount = self.preferences['Pseudocount']
            seq1 += pseudocount
            seq2 += pseudocount
            totalSeq1 += 2 * pseudocount
            totalSeq2 += 2 * pseudocount
            note = 'degenerate case: CI calculation used pseudocount'

        effectSize = (float(seq1) / totalSeq1) / (float(seq2) / totalSeq2)
        logEffectSize = math.log(effectSize)

        logSE = math.sqrt(1.0 / seq1 - 1.0 / totalSeq1 + 1.0 / seq2 -
                          1.0 / totalSeq2)

        z = zScore(coverage)
        logLowerCI = logEffectSize - z * logSE
        logUpperCI = logEffectSize + z * logSE

        lowerCI = math.exp(logLowerCI)
        upperCI = math.exp(logUpperCI)

        return lowerCI, upperCI, effectSize, note
Esempio n. 4
0
	def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
		'''
		Calculate confidence interval using Newcombe-Wilson method.
			Results are report as percent difference.
		'''
		note = ''
		
		if totalSeq1 == 0:
			totalSeq1 = self.preferences['Pseudocount']
			note = 'degenerate case: CI calculation used pseudocount'
			
		if totalSeq2 == 0:
			totalSeq2 = self.preferences['Pseudocount']
			note = 'degenerate case: CI calculation used pseudocount'
		
		z = zScore(coverage)
		
		roots1 = self.NewcombeWilsonFindRoots(seq1, totalSeq1, z)
		roots2 = self.NewcombeWilsonFindRoots(seq2, totalSeq2, z)
	
		diff = float(seq1)/totalSeq1 - float(seq2)/totalSeq2
		lowerCI = z*math.sqrt(roots1[0]*(1-roots1[0])/totalSeq1 + roots2[1]*(1-roots2[1])/totalSeq2)
		upperCI = z*math.sqrt(roots1[1]*(1-roots1[1])/totalSeq1 + roots2[0]*(1-roots2[0])/totalSeq2)
		
		return (diff-lowerCI)*100, (diff+upperCI)*100, diff*100, note
    def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
        '''
		Calculate confidence interval using asymptotic method with a continuity correction.
			Results are report as percent difference.
		'''
        note = ''

        if totalSeq1 == 0:
            totalSeq1 = self.preferences['Pseudocount']
            note = 'degenerate case: CI calculation used pseudocount'

        if totalSeq2 == 0:
            totalSeq2 = self.preferences['Pseudocount']
            note = 'degenerate case: CI calculation used pseudocount'

        R1 = float(seq1) / totalSeq1
        R2 = float(seq2) / totalSeq2

        diff = R1 - R2
        stdErr = math.sqrt(
            (R1 * (1 - R1)) / totalSeq1 +
            (R2 *
             (1 - R2)) / totalSeq2) + (1.0 / totalSeq1 + 1.0 / totalSeq2) / 2
        offset = zScore(coverage) * stdErr

        return (diff - offset) * 100, (diff + offset) * 100, diff * 100, note
Esempio n. 6
0
    def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
        '''
		Calculate confidence interval using Newcombe-Wilson method.
			Results are report as percent difference.
		'''
        note = ''

        if totalSeq1 == 0:
            totalSeq1 = self.preferences['Pseudocount']
            note = 'degenerate case: CI calculation used pseudocount'

        if totalSeq2 == 0:
            totalSeq2 = self.preferences['Pseudocount']
            note = 'degenerate case: CI calculation used pseudocount'

        z = zScore(coverage)

        roots1 = self.NewcombeWilsonFindRoots(seq1, totalSeq1, z)
        roots2 = self.NewcombeWilsonFindRoots(seq2, totalSeq2, z)

        diff = float(seq1) / totalSeq1 - float(seq2) / totalSeq2
        lowerCI = z * math.sqrt(roots1[0] *
                                (1 - roots1[0]) / totalSeq1 + roots2[1] *
                                (1 - roots2[1]) / totalSeq2)
        upperCI = z * math.sqrt(roots1[1] *
                                (1 - roots1[1]) / totalSeq1 + roots2[0] *
                                (1 - roots2[0]) / totalSeq2)

        return (diff - lowerCI) * 100, (diff + upperCI) * 100, diff * 100, note
Esempio n. 7
0
	def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
		'''
		Calculate ratio of proportions (relative risk) confidence interval. 
		'''
		note = ''
		if seq1 == 0 or seq2 == 0:
			pseudocount = self.preferences['Pseudocount']
			seq1 += pseudocount
			seq2 += pseudocount
			totalSeq1 += 2*pseudocount
			totalSeq2 += 2*pseudocount
			note = 'degenerate case: CI calculation used pseudocount'
			
		effectSize = (float(seq1) / totalSeq1) / (float(seq2) / totalSeq2)
		logEffectSize = math.log(effectSize)
		
		logSE = math.sqrt(1.0/seq1 - 1.0/totalSeq1 + 1.0/seq2 - 1.0/totalSeq2)
		
		z = zScore(coverage)
		logLowerCI = logEffectSize - z*logSE
		logUpperCI = logEffectSize + z*logSE
		
		lowerCI = math.exp(logLowerCI)
		upperCI = math.exp(logUpperCI)
		
		return lowerCI, upperCI, effectSize, note
Esempio n. 8
0
	def testNormalDist(self):
		"""Verify computation of normal distribution methods"""
		from stamp.metagenomics.stats.distributions.NormalDist import standardNormalCDF, zScore
		
		self.assertAlmostEqual(standardNormalCDF(-2), 0.022750131948179209)
		self.assertAlmostEqual(standardNormalCDF(-1), 0.15865525393145705)
		self.assertAlmostEqual(standardNormalCDF(0), 0.5)
		self.assertAlmostEqual(standardNormalCDF(1), 0.84134474606854293)
		self.assertAlmostEqual(standardNormalCDF(2), 0.97724986805182079)
		self.assertAlmostEqual(standardNormalCDF(-1e-6), 1.0 - standardNormalCDF(1e-6))
		self.assertAlmostEqual(standardNormalCDF(-1e-12), 1.0 - standardNormalCDF(1e-12))
		
		self.assertAlmostEqual(zScore(0.90), 1.6448536269514722)
		self.assertAlmostEqual(zScore(0.95), 1.959963984540054)
		self.assertAlmostEqual(zScore(0.98), 2.3263478740408408)
		self.assertAlmostEqual(zScore(0.99), 2.5758293035489004)
		self.assertAlmostEqual(zScore(0.80), 1.2815515655446004)
Esempio n. 9
0
  def equalSampleSize(self, seq1, seq2, totalSeq1, totalSeq2, alpha, beta):
    # The chi-square test is equivalent to the difference between proportions
    # test as illustrated by Rivals et al., 2007. Here we use the standard
    # equal sample size formulation for a difference between proportions test.
    oneMinusAlpha = 1.0 - alpha
    oneMinusBeta = 1.0 - beta
    
    p1 = float(seq1) / totalSeq1
    p2 = float(seq2) / totalSeq2
    q1 = 1.0 - p1
    q2 = 1.0 - p2
    d = p1 - p2
    
    if d == 0:
      return 1  

    return (zScore(oneMinusAlpha) * math.sqrt((p1 + p2)*(q1 + q2)/2) + zScore(oneMinusBeta)*math.sqrt((p1*q1) + (p2*q2)))**2 / (d**2)
Esempio n. 10
0
    def equalSampleSize(self, seq1, seq2, totalSeq1, totalSeq2, alpha, beta):
        # The chi-square test is equivalent to the difference between proportions
        # test as illustrated by Rivals et al., 2007. Here we use the standard
        # equal sample size formulation for a difference between proportions test.
        oneMinusAlpha = 1.0 - alpha
        oneMinusBeta = 1.0 - beta

        p1 = float(seq1) / totalSeq1
        p2 = float(seq2) / totalSeq2
        q1 = 1.0 - p1
        q2 = 1.0 - p2
        d = p1 - p2

        if d == 0:
            return 1

        return (zScore(oneMinusAlpha) * math.sqrt((p1 + p2) * (q1 + q2) / 2) +
                zScore(oneMinusBeta) * math.sqrt((p1 * q1) +
                                                 (p2 * q2)))**2 / (d**2)
Esempio n. 11
0
    def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
        '''
		Calculate odds ratio confidence interval. 
		'''
        a, b, c, d, note = self.tableValues(seq1, seq2, totalSeq1, totalSeq2)

        effectSize = (float(a) * d) / (float(b) * c)
        logEffectSize = math.log(effectSize)

        logSE = math.sqrt(1.0 / a + 1.0 / b + 1.0 / c + 1.0 / d)

        z = zScore(coverage)
        logLowerCI = logEffectSize - z * logSE
        logUpperCI = logEffectSize + z * logSE

        lowerCI = math.exp(logLowerCI)
        upperCI = math.exp(logUpperCI)

        return lowerCI, upperCI, effectSize, note
Esempio n. 12
0
	def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
		'''
		Calculate odds ratio confidence interval. 
		'''
		a, b, c, d, note = self.tableValues(seq1, seq2, totalSeq1, totalSeq2)
		
		effectSize = (float(a) * d) / (float(b) * c)
		logEffectSize = math.log(effectSize)
		
		logSE = math.sqrt(1.0/a + 1.0/b + 1.0/c + 1.0/d)
		
		z = zScore(coverage)
		logLowerCI = logEffectSize - z*logSE
		logUpperCI = logEffectSize + z*logSE
		
		lowerCI = math.exp(logLowerCI)
		upperCI = math.exp(logUpperCI)
		
		return lowerCI, upperCI, effectSize, note
Esempio n. 13
0
	def power(self, seq1, seq2, totalSeq1, totalSeq2, alpha): 
		oneMinusAlpha = 1.0 - alpha
		 
		p1 = float(seq1) / totalSeq1
		p2 = float(seq2) / totalSeq2
		d = p1 - p2

		stdDev = math.sqrt( (p1 * (1-p1)) / totalSeq1 + (p2 * (1 - p2)) / totalSeq2 )
		
		if stdDev != 0:		
			p = float(totalSeq1*p1 + totalSeq2*p2) / (totalSeq1 + totalSeq2)
			q = 1-p
			pooledStdDev = math.sqrt( (p*q) / totalSeq1 + (p*q) / totalSeq2 )
			
			zScore = zScore(oneMinusAlpha)
			zLower = ( -zScore * pooledStdDev - d ) / stdDev
			zUpper= ( zScore * pooledStdDev - d ) / stdDev
		
			return standardNormalCDF(zLower) + (1.0 - standardNormalCDF(zUpper))
		else:
			return 1.0
Esempio n. 14
0
	def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
		'''
		Calculate confidence interval using standard asymptotic method.
			Results are report as percent difference.
		'''
		note = ''
		
		if totalSeq1 == 0:
			totalSeq1 = self.preferences['Pseudocount']
			note = 'degenerate case: CI calculation used pseudocount'
			
		if totalSeq2 == 0:
			totalSeq2 = self.preferences['Pseudocount']
			note = 'degenerate case: CI calculation used pseudocount'
			
		R1 = float(seq1) / totalSeq1
		R2 = float(seq2) / totalSeq2
		
		diff = R1 - R2
		stdErr = math.sqrt((R1*(1-R1)) / totalSeq1 + (R2*(1-R2)) / totalSeq2)
		offset = zScore(coverage) * stdErr
		
		return (diff - offset) * 100, (diff + offset) * 100, diff * 100, note
Esempio n. 15
0
  def power(self, seq1, seq2, totalSeq1, totalSeq2, alpha): 
    # The chi-square test is equivalent to the difference between proportions
    # test as illustrated by Rivals et al., 2007. Here we use the standard
    # asymptotic power formulation for a difference between proportions test.
    oneMinusAlpha = 1.0 - alpha
     
    p1 = float(seq1) / totalSeq1
    p2 = float(seq2) / totalSeq2
    d = p1 - p2

    stdDev = math.sqrt( (p1 * (1-p1)) / totalSeq1 + (p2 * (1 - p2)) / totalSeq2 )
    
    if stdDev != 0:    
      p = float(totalSeq1*p1 + totalSeq2*p2) / (totalSeq1 + totalSeq2)
      q = 1-p
      pooledStdDev = math.sqrt( (p*q) / totalSeq1 + (p*q) / totalSeq2 )
      
      zScore = zScore(oneMinusAlpha)
      zLower = ( -zScore * pooledStdDev - d ) / stdDev
      zUpper= ( zScore * pooledStdDev - d ) / stdDev
    
      return standardNormalCDF(zLower) + (1.0 - standardNormalCDF(zUpper))
    else:
      return 1.0
Esempio n. 16
0
		 To facilitate calling this function on several different binomial random variables this is taken as a
		 parameter so it only needs to be calculated once.
		 '''
		 
		totalSeqs = max(totalSeqs, 1.0) 
		
		z = zScore
		zSqrd = z*z
		
		p = float(posSeqs) / totalSeqs
		q = 1.0 - p

		term1 = p + zSqrd / (2*totalSeqs)
		offset = z * math.sqrt(p*q / totalSeqs + zSqrd / (4*totalSeqs*totalSeqs))
		denom = 1 + zSqrd / totalSeqs
		
		lowerCI = (term1 - offset) / denom
		upperCI = (term1 + offset) / denom
		
		# Good correction, but computationally expensive
		#if posSeqs >= 1 and posSeqs <=3:
			# use one-sided Poisson approximation when probability ~= 0 (see Brown et al., 2001)
		#	lowerCI = 0.5*chi2.isf(coverage, 2*posSeqs) / totalSeqs
		
		return lowerCI, upperCI, p
	
if __name__ == "__main__": 
	wilsonCI = WilsonCI()
	lowerCI, upperCI, p = wilsonCI.run(10,100, 0.95, zScore(0.95))
	print lowerCI, upperCI, p
Esempio n. 17
0
		 '''

        totalSeqs = max(totalSeqs, 1.0)

        z = zScore
        zSqrd = z * z

        p = float(posSeqs) / totalSeqs
        q = 1.0 - p

        term1 = p + zSqrd / (2 * totalSeqs)
        offset = z * math.sqrt(p * q / totalSeqs + zSqrd /
                               (4 * totalSeqs * totalSeqs))
        denom = 1 + zSqrd / totalSeqs

        lowerCI = (term1 - offset) / denom
        upperCI = (term1 + offset) / denom

        # Good correction, but computationally expensive
        #if posSeqs >= 1 and posSeqs <=3:
        # use one-sided Poisson approximation when probability ~= 0 (see Brown et al., 2001)
        #	lowerCI = 0.5*chi2.isf(coverage, 2*posSeqs) / totalSeqs

        return lowerCI, upperCI, p


if __name__ == "__main__":
    wilsonCI = WilsonCI()
    lowerCI, upperCI, p = wilsonCI.run(10, 100, 0.95, zScore(0.95))
    print lowerCI, upperCI, p