Example #1
0
	def equalSampleSize(self, seq1, seq2, totalSeq1, totalSeq2, alpha, beta):
		oneMinusAlpha = 1.0 - alpha
		oneMinusBeta = 1.0 - beta
		
		p1 = float(seq1) / totalSeq1
		p2 = float(seq2) / totalSeq2
		q1 = 1.0 - p1
		q2 = 1.0 - p2
		d = p1 - p2
		
		if d == 0:
			return 1	

		return (zScore(oneMinusAlpha) * math.sqrt((p1 + p2)*(q1 + q2)/2) + zScore(oneMinusBeta)*math.sqrt((p1*q1) + (p2*q2)))**2 / (d**2)
Example #2
0
	def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
		'''
		Calculate ratio of proportions (relative risk) confidence interval. 
		'''
		note = ''
		if seq1 == 0 or seq2 == 0:
			pseudocount = self.preferences['Pseudocount']
			seq1 += pseudocount
			seq2 += pseudocount
			totalSeq1 += 2*pseudocount
			totalSeq2 += 2*pseudocount
			note = 'degenerate case: CI calculation used pseudocount'
			
		effectSize = (float(seq1) / totalSeq1) / (float(seq2) / totalSeq2)
		logEffectSize = math.log(effectSize)
		
		logSE = math.sqrt(1.0/seq1 - 1.0/totalSeq1 + 1.0/seq2 - 1.0/totalSeq2)
		
		z = zScore(coverage)
		logLowerCI = logEffectSize - z*logSE
		logUpperCI = logEffectSize + z*logSE
		
		lowerCI = math.exp(logLowerCI)
		upperCI = math.exp(logUpperCI)
		
		return lowerCI, upperCI, effectSize, note
Example #3
0
	def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
		'''
		Calculate confidence interval using Newcombe-Wilson method.
			Results are report as percent difference.
		'''
		note = ''
		
		if totalSeq1 == 0:
			totalSeq1 = self.preferences['Pseudocount']
			note = 'degenerate case: CI calculation used pseudocount'
			
		if totalSeq2 == 0:
			totalSeq2 = self.preferences['Pseudocount']
			note = 'degenerate case: CI calculation used pseudocount'
		
		z = zScore(coverage)
		
		roots1 = self.NewcombeWilsonFindRoots(seq1, totalSeq1, z)
		roots2 = self.NewcombeWilsonFindRoots(seq2, totalSeq2, z)
	
		diff = float(seq1)/totalSeq1 - float(seq2)/totalSeq2
		lowerCI = z*math.sqrt(roots1[0]*(1-roots1[0])/totalSeq1 + roots2[1]*(1-roots2[1])/totalSeq2)
		upperCI = z*math.sqrt(roots1[1]*(1-roots1[1])/totalSeq1 + roots2[0]*(1-roots2[0])/totalSeq2)
		
		return (diff-lowerCI)*100, (diff+upperCI)*100, diff*100, note
Example #4
0
  def equalSampleSize(self, seq1, seq2, totalSeq1, totalSeq2, alpha, beta):
    # The chi-square test is equivalent to the difference between proportions
    # test as illustrated by Rivals et al., 2007. Here we use the standard
    # equal sample size formulation for a difference between proportions test.
    oneMinusAlpha = 1.0 - alpha
    oneMinusBeta = 1.0 - beta
    
    p1 = float(seq1) / totalSeq1
    p2 = float(seq2) / totalSeq2
    q1 = 1.0 - p1
    q2 = 1.0 - p2
    d = p1 - p2
    
    if d == 0:
      return 1  

    return (zScore(oneMinusAlpha) * math.sqrt((p1 + p2)*(q1 + q2)/2) + zScore(oneMinusBeta)*math.sqrt((p1*q1) + (p2*q2)))**2 / (d**2)
Example #5
0
	def testNormalDist(self):
		"""Verify computation of normal distribution methods"""
		from stamp.metagenomics.stats.distributions.NormalDist import standardNormalCDF, zScore
		
		self.assertAlmostEqual(standardNormalCDF(-2), 0.022750131948179209)
		self.assertAlmostEqual(standardNormalCDF(-1), 0.15865525393145705)
		self.assertAlmostEqual(standardNormalCDF(0), 0.5)
		self.assertAlmostEqual(standardNormalCDF(1), 0.84134474606854293)
		self.assertAlmostEqual(standardNormalCDF(2), 0.97724986805182079)
		self.assertAlmostEqual(standardNormalCDF(-1e-6), 1.0 - standardNormalCDF(1e-6))
		self.assertAlmostEqual(standardNormalCDF(-1e-12), 1.0 - standardNormalCDF(1e-12))
		
		self.assertAlmostEqual(zScore(0.90), 1.6448536269514722)
		self.assertAlmostEqual(zScore(0.95), 1.959963984540054)
		self.assertAlmostEqual(zScore(0.98), 2.3263478740408408)
		self.assertAlmostEqual(zScore(0.99), 2.5758293035489004)
		self.assertAlmostEqual(zScore(0.80), 1.2815515655446004)
Example #6
0
	def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
		'''
		Calculate odds ratio confidence interval. 
		'''
		a, b, c, d, note = self.tableValues(seq1, seq2, totalSeq1, totalSeq2)
		
		effectSize = (float(a) * d) / (float(b) * c)
		logEffectSize = math.log(effectSize)
		
		logSE = math.sqrt(1.0/a + 1.0/b + 1.0/c + 1.0/d)
		
		z = zScore(coverage)
		logLowerCI = logEffectSize - z*logSE
		logUpperCI = logEffectSize + z*logSE
		
		lowerCI = math.exp(logLowerCI)
		upperCI = math.exp(logUpperCI)
		
		return lowerCI, upperCI, effectSize, note
Example #7
0
	def power(self, seq1, seq2, totalSeq1, totalSeq2, alpha): 
		oneMinusAlpha = 1.0 - alpha
		 
		p1 = float(seq1) / totalSeq1
		p2 = float(seq2) / totalSeq2
		d = p1 - p2

		stdDev = math.sqrt( (p1 * (1-p1)) / totalSeq1 + (p2 * (1 - p2)) / totalSeq2 )
		
		if stdDev != 0:		
			p = float(totalSeq1*p1 + totalSeq2*p2) / (totalSeq1 + totalSeq2)
			q = 1-p
			pooledStdDev = math.sqrt( (p*q) / totalSeq1 + (p*q) / totalSeq2 )
			
			zScore = zScore(oneMinusAlpha)
			zLower = ( -zScore * pooledStdDev - d ) / stdDev
			zUpper= ( zScore * pooledStdDev - d ) / stdDev
		
			return standardNormalCDF(zLower) + (1.0 - standardNormalCDF(zUpper))
		else:
			return 1.0
	def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage):
		'''
		Calculate confidence interval using standard asymptotic method.
			Results are report as percent difference.
		'''
		note = ''
		
		if totalSeq1 == 0:
			totalSeq1 = self.preferences['Pseudocount']
			note = 'degenerate case: CI calculation used pseudocount'
			
		if totalSeq2 == 0:
			totalSeq2 = self.preferences['Pseudocount']
			note = 'degenerate case: CI calculation used pseudocount'
			
		R1 = float(seq1) / totalSeq1
		R2 = float(seq2) / totalSeq2
		
		diff = R1 - R2
		stdErr = math.sqrt((R1*(1-R1)) / totalSeq1 + (R2*(1-R2)) / totalSeq2)
		offset = zScore(coverage) * stdErr
		
		return (diff - offset) * 100, (diff + offset) * 100, diff * 100, note
Example #9
0
  def power(self, seq1, seq2, totalSeq1, totalSeq2, alpha): 
    # The chi-square test is equivalent to the difference between proportions
    # test as illustrated by Rivals et al., 2007. Here we use the standard
    # asymptotic power formulation for a difference between proportions test.
    oneMinusAlpha = 1.0 - alpha
     
    p1 = float(seq1) / totalSeq1
    p2 = float(seq2) / totalSeq2
    d = p1 - p2

    stdDev = math.sqrt( (p1 * (1-p1)) / totalSeq1 + (p2 * (1 - p2)) / totalSeq2 )
    
    if stdDev != 0:    
      p = float(totalSeq1*p1 + totalSeq2*p2) / (totalSeq1 + totalSeq2)
      q = 1-p
      pooledStdDev = math.sqrt( (p*q) / totalSeq1 + (p*q) / totalSeq2 )
      
      zScore = zScore(oneMinusAlpha)
      zLower = ( -zScore * pooledStdDev - d ) / stdDev
      zUpper= ( zScore * pooledStdDev - d ) / stdDev
    
      return standardNormalCDF(zLower) + (1.0 - standardNormalCDF(zUpper))
    else:
      return 1.0
Example #10
0
		 To facilitate calling this function on several different binomial random variables this is taken as a
		 parameter so it only needs to be calculated once.
		 '''
		 
		totalSeqs = max(totalSeqs, 1.0) 
		
		z = zScore
		zSqrd = z*z
		
		p = float(posSeqs) / totalSeqs
		q = 1.0 - p

		term1 = p + zSqrd / (2*totalSeqs)
		offset = z * math.sqrt(p*q / totalSeqs + zSqrd / (4*totalSeqs*totalSeqs))
		denom = 1 + zSqrd / totalSeqs
		
		lowerCI = (term1 - offset) / denom
		upperCI = (term1 + offset) / denom
		
		# Good correction, but computationally expensive
		#if posSeqs >= 1 and posSeqs <=3:
			# use one-sided Poisson approximation when probability ~= 0 (see Brown et al., 2001)
		#	lowerCI = 0.5*chi2.isf(coverage, 2*posSeqs) / totalSeqs
		
		return lowerCI, upperCI, p
	
if __name__ == "__main__": 
	wilsonCI = WilsonCI()
	lowerCI, upperCI, p = wilsonCI.run(10,100, 0.95, zScore(0.95))
	print lowerCI, upperCI, p