def equalSampleSize(self, seq1, seq2, totalSeq1, totalSeq2, alpha, beta): oneMinusAlpha = 1.0 - alpha oneMinusBeta = 1.0 - beta p1 = float(seq1) / totalSeq1 p2 = float(seq2) / totalSeq2 q1 = 1.0 - p1 q2 = 1.0 - p2 d = p1 - p2 if d == 0: return 1 return (zScore(oneMinusAlpha) * math.sqrt((p1 + p2)*(q1 + q2)/2) + zScore(oneMinusBeta)*math.sqrt((p1*q1) + (p2*q2)))**2 / (d**2)
def power(self, seq1, seq2, totalSeq1, totalSeq2, alpha): # The chi-square test is equivalent to the difference between proportions # test as illustrated by Rivals et al., 2007. Here we use the standard # asymptotic power formulation for a difference between proportions test. oneMinusAlpha = 1.0 - alpha p1 = float(seq1) / totalSeq1 p2 = float(seq2) / totalSeq2 d = p1 - p2 stdDev = math.sqrt((p1 * (1 - p1)) / totalSeq1 + (p2 * (1 - p2)) / totalSeq2) if stdDev != 0: p = float(totalSeq1 * p1 + totalSeq2 * p2) / (totalSeq1 + totalSeq2) q = 1 - p pooledStdDev = math.sqrt((p * q) / totalSeq1 + (p * q) / totalSeq2) zScore = zScore(oneMinusAlpha) zLower = (-zScore * pooledStdDev - d) / stdDev zUpper = (zScore * pooledStdDev - d) / stdDev return standardNormalCDF(zLower) + (1.0 - standardNormalCDF(zUpper)) else: return 1.0
def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage): ''' Calculate ratio of proportions (relative risk) confidence interval. ''' note = '' if seq1 == 0 or seq2 == 0: pseudocount = self.preferences['Pseudocount'] seq1 += pseudocount seq2 += pseudocount totalSeq1 += 2 * pseudocount totalSeq2 += 2 * pseudocount note = 'degenerate case: CI calculation used pseudocount' effectSize = (float(seq1) / totalSeq1) / (float(seq2) / totalSeq2) logEffectSize = math.log(effectSize) logSE = math.sqrt(1.0 / seq1 - 1.0 / totalSeq1 + 1.0 / seq2 - 1.0 / totalSeq2) z = zScore(coverage) logLowerCI = logEffectSize - z * logSE logUpperCI = logEffectSize + z * logSE lowerCI = math.exp(logLowerCI) upperCI = math.exp(logUpperCI) return lowerCI, upperCI, effectSize, note
def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage): ''' Calculate confidence interval using Newcombe-Wilson method. Results are report as percent difference. ''' note = '' if totalSeq1 == 0: totalSeq1 = self.preferences['Pseudocount'] note = 'degenerate case: CI calculation used pseudocount' if totalSeq2 == 0: totalSeq2 = self.preferences['Pseudocount'] note = 'degenerate case: CI calculation used pseudocount' z = zScore(coverage) roots1 = self.NewcombeWilsonFindRoots(seq1, totalSeq1, z) roots2 = self.NewcombeWilsonFindRoots(seq2, totalSeq2, z) diff = float(seq1)/totalSeq1 - float(seq2)/totalSeq2 lowerCI = z*math.sqrt(roots1[0]*(1-roots1[0])/totalSeq1 + roots2[1]*(1-roots2[1])/totalSeq2) upperCI = z*math.sqrt(roots1[1]*(1-roots1[1])/totalSeq1 + roots2[0]*(1-roots2[0])/totalSeq2) return (diff-lowerCI)*100, (diff+upperCI)*100, diff*100, note
def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage): ''' Calculate confidence interval using asymptotic method with a continuity correction. Results are report as percent difference. ''' note = '' if totalSeq1 == 0: totalSeq1 = self.preferences['Pseudocount'] note = 'degenerate case: CI calculation used pseudocount' if totalSeq2 == 0: totalSeq2 = self.preferences['Pseudocount'] note = 'degenerate case: CI calculation used pseudocount' R1 = float(seq1) / totalSeq1 R2 = float(seq2) / totalSeq2 diff = R1 - R2 stdErr = math.sqrt( (R1 * (1 - R1)) / totalSeq1 + (R2 * (1 - R2)) / totalSeq2) + (1.0 / totalSeq1 + 1.0 / totalSeq2) / 2 offset = zScore(coverage) * stdErr return (diff - offset) * 100, (diff + offset) * 100, diff * 100, note
def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage): ''' Calculate confidence interval using Newcombe-Wilson method. Results are report as percent difference. ''' note = '' if totalSeq1 == 0: totalSeq1 = self.preferences['Pseudocount'] note = 'degenerate case: CI calculation used pseudocount' if totalSeq2 == 0: totalSeq2 = self.preferences['Pseudocount'] note = 'degenerate case: CI calculation used pseudocount' z = zScore(coverage) roots1 = self.NewcombeWilsonFindRoots(seq1, totalSeq1, z) roots2 = self.NewcombeWilsonFindRoots(seq2, totalSeq2, z) diff = float(seq1) / totalSeq1 - float(seq2) / totalSeq2 lowerCI = z * math.sqrt(roots1[0] * (1 - roots1[0]) / totalSeq1 + roots2[1] * (1 - roots2[1]) / totalSeq2) upperCI = z * math.sqrt(roots1[1] * (1 - roots1[1]) / totalSeq1 + roots2[0] * (1 - roots2[0]) / totalSeq2) return (diff - lowerCI) * 100, (diff + upperCI) * 100, diff * 100, note
def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage): ''' Calculate ratio of proportions (relative risk) confidence interval. ''' note = '' if seq1 == 0 or seq2 == 0: pseudocount = self.preferences['Pseudocount'] seq1 += pseudocount seq2 += pseudocount totalSeq1 += 2*pseudocount totalSeq2 += 2*pseudocount note = 'degenerate case: CI calculation used pseudocount' effectSize = (float(seq1) / totalSeq1) / (float(seq2) / totalSeq2) logEffectSize = math.log(effectSize) logSE = math.sqrt(1.0/seq1 - 1.0/totalSeq1 + 1.0/seq2 - 1.0/totalSeq2) z = zScore(coverage) logLowerCI = logEffectSize - z*logSE logUpperCI = logEffectSize + z*logSE lowerCI = math.exp(logLowerCI) upperCI = math.exp(logUpperCI) return lowerCI, upperCI, effectSize, note
def testNormalDist(self): """Verify computation of normal distribution methods""" from stamp.metagenomics.stats.distributions.NormalDist import standardNormalCDF, zScore self.assertAlmostEqual(standardNormalCDF(-2), 0.022750131948179209) self.assertAlmostEqual(standardNormalCDF(-1), 0.15865525393145705) self.assertAlmostEqual(standardNormalCDF(0), 0.5) self.assertAlmostEqual(standardNormalCDF(1), 0.84134474606854293) self.assertAlmostEqual(standardNormalCDF(2), 0.97724986805182079) self.assertAlmostEqual(standardNormalCDF(-1e-6), 1.0 - standardNormalCDF(1e-6)) self.assertAlmostEqual(standardNormalCDF(-1e-12), 1.0 - standardNormalCDF(1e-12)) self.assertAlmostEqual(zScore(0.90), 1.6448536269514722) self.assertAlmostEqual(zScore(0.95), 1.959963984540054) self.assertAlmostEqual(zScore(0.98), 2.3263478740408408) self.assertAlmostEqual(zScore(0.99), 2.5758293035489004) self.assertAlmostEqual(zScore(0.80), 1.2815515655446004)
def equalSampleSize(self, seq1, seq2, totalSeq1, totalSeq2, alpha, beta): # The chi-square test is equivalent to the difference between proportions # test as illustrated by Rivals et al., 2007. Here we use the standard # equal sample size formulation for a difference between proportions test. oneMinusAlpha = 1.0 - alpha oneMinusBeta = 1.0 - beta p1 = float(seq1) / totalSeq1 p2 = float(seq2) / totalSeq2 q1 = 1.0 - p1 q2 = 1.0 - p2 d = p1 - p2 if d == 0: return 1 return (zScore(oneMinusAlpha) * math.sqrt((p1 + p2)*(q1 + q2)/2) + zScore(oneMinusBeta)*math.sqrt((p1*q1) + (p2*q2)))**2 / (d**2)
def equalSampleSize(self, seq1, seq2, totalSeq1, totalSeq2, alpha, beta): # The chi-square test is equivalent to the difference between proportions # test as illustrated by Rivals et al., 2007. Here we use the standard # equal sample size formulation for a difference between proportions test. oneMinusAlpha = 1.0 - alpha oneMinusBeta = 1.0 - beta p1 = float(seq1) / totalSeq1 p2 = float(seq2) / totalSeq2 q1 = 1.0 - p1 q2 = 1.0 - p2 d = p1 - p2 if d == 0: return 1 return (zScore(oneMinusAlpha) * math.sqrt((p1 + p2) * (q1 + q2) / 2) + zScore(oneMinusBeta) * math.sqrt((p1 * q1) + (p2 * q2)))**2 / (d**2)
def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage): ''' Calculate odds ratio confidence interval. ''' a, b, c, d, note = self.tableValues(seq1, seq2, totalSeq1, totalSeq2) effectSize = (float(a) * d) / (float(b) * c) logEffectSize = math.log(effectSize) logSE = math.sqrt(1.0 / a + 1.0 / b + 1.0 / c + 1.0 / d) z = zScore(coverage) logLowerCI = logEffectSize - z * logSE logUpperCI = logEffectSize + z * logSE lowerCI = math.exp(logLowerCI) upperCI = math.exp(logUpperCI) return lowerCI, upperCI, effectSize, note
def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage): ''' Calculate odds ratio confidence interval. ''' a, b, c, d, note = self.tableValues(seq1, seq2, totalSeq1, totalSeq2) effectSize = (float(a) * d) / (float(b) * c) logEffectSize = math.log(effectSize) logSE = math.sqrt(1.0/a + 1.0/b + 1.0/c + 1.0/d) z = zScore(coverage) logLowerCI = logEffectSize - z*logSE logUpperCI = logEffectSize + z*logSE lowerCI = math.exp(logLowerCI) upperCI = math.exp(logUpperCI) return lowerCI, upperCI, effectSize, note
def power(self, seq1, seq2, totalSeq1, totalSeq2, alpha): oneMinusAlpha = 1.0 - alpha p1 = float(seq1) / totalSeq1 p2 = float(seq2) / totalSeq2 d = p1 - p2 stdDev = math.sqrt( (p1 * (1-p1)) / totalSeq1 + (p2 * (1 - p2)) / totalSeq2 ) if stdDev != 0: p = float(totalSeq1*p1 + totalSeq2*p2) / (totalSeq1 + totalSeq2) q = 1-p pooledStdDev = math.sqrt( (p*q) / totalSeq1 + (p*q) / totalSeq2 ) zScore = zScore(oneMinusAlpha) zLower = ( -zScore * pooledStdDev - d ) / stdDev zUpper= ( zScore * pooledStdDev - d ) / stdDev return standardNormalCDF(zLower) + (1.0 - standardNormalCDF(zUpper)) else: return 1.0
def run(self, seq1, seq2, totalSeq1, totalSeq2, coverage): ''' Calculate confidence interval using standard asymptotic method. Results are report as percent difference. ''' note = '' if totalSeq1 == 0: totalSeq1 = self.preferences['Pseudocount'] note = 'degenerate case: CI calculation used pseudocount' if totalSeq2 == 0: totalSeq2 = self.preferences['Pseudocount'] note = 'degenerate case: CI calculation used pseudocount' R1 = float(seq1) / totalSeq1 R2 = float(seq2) / totalSeq2 diff = R1 - R2 stdErr = math.sqrt((R1*(1-R1)) / totalSeq1 + (R2*(1-R2)) / totalSeq2) offset = zScore(coverage) * stdErr return (diff - offset) * 100, (diff + offset) * 100, diff * 100, note
def power(self, seq1, seq2, totalSeq1, totalSeq2, alpha): # The chi-square test is equivalent to the difference between proportions # test as illustrated by Rivals et al., 2007. Here we use the standard # asymptotic power formulation for a difference between proportions test. oneMinusAlpha = 1.0 - alpha p1 = float(seq1) / totalSeq1 p2 = float(seq2) / totalSeq2 d = p1 - p2 stdDev = math.sqrt( (p1 * (1-p1)) / totalSeq1 + (p2 * (1 - p2)) / totalSeq2 ) if stdDev != 0: p = float(totalSeq1*p1 + totalSeq2*p2) / (totalSeq1 + totalSeq2) q = 1-p pooledStdDev = math.sqrt( (p*q) / totalSeq1 + (p*q) / totalSeq2 ) zScore = zScore(oneMinusAlpha) zLower = ( -zScore * pooledStdDev - d ) / stdDev zUpper= ( zScore * pooledStdDev - d ) / stdDev return standardNormalCDF(zLower) + (1.0 - standardNormalCDF(zUpper)) else: return 1.0
To facilitate calling this function on several different binomial random variables this is taken as a parameter so it only needs to be calculated once. ''' totalSeqs = max(totalSeqs, 1.0) z = zScore zSqrd = z*z p = float(posSeqs) / totalSeqs q = 1.0 - p term1 = p + zSqrd / (2*totalSeqs) offset = z * math.sqrt(p*q / totalSeqs + zSqrd / (4*totalSeqs*totalSeqs)) denom = 1 + zSqrd / totalSeqs lowerCI = (term1 - offset) / denom upperCI = (term1 + offset) / denom # Good correction, but computationally expensive #if posSeqs >= 1 and posSeqs <=3: # use one-sided Poisson approximation when probability ~= 0 (see Brown et al., 2001) # lowerCI = 0.5*chi2.isf(coverage, 2*posSeqs) / totalSeqs return lowerCI, upperCI, p if __name__ == "__main__": wilsonCI = WilsonCI() lowerCI, upperCI, p = wilsonCI.run(10,100, 0.95, zScore(0.95)) print lowerCI, upperCI, p
''' totalSeqs = max(totalSeqs, 1.0) z = zScore zSqrd = z * z p = float(posSeqs) / totalSeqs q = 1.0 - p term1 = p + zSqrd / (2 * totalSeqs) offset = z * math.sqrt(p * q / totalSeqs + zSqrd / (4 * totalSeqs * totalSeqs)) denom = 1 + zSqrd / totalSeqs lowerCI = (term1 - offset) / denom upperCI = (term1 + offset) / denom # Good correction, but computationally expensive #if posSeqs >= 1 and posSeqs <=3: # use one-sided Poisson approximation when probability ~= 0 (see Brown et al., 2001) # lowerCI = 0.5*chi2.isf(coverage, 2*posSeqs) / totalSeqs return lowerCI, upperCI, p if __name__ == "__main__": wilsonCI = WilsonCI() lowerCI, upperCI, p = wilsonCI.run(10, 100, 0.95, zScore(0.95)) print lowerCI, upperCI, p