def detect_differentially_abundant_features(seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, coverage, B, preferences, progress): numFeatures = len(seqGroup1) n1 = len(seqGroup1[0]) n2 = len(seqGroup2[0]) # convert to proportions propGroup1 = [] for r in xrange(0, numFeatures): row = [] for c in xrange(0, n1): row.append(float(seqGroup1[r][c]) / parentSeqGroup1[r][c]) propGroup1.append(row) propGroup2 = [] for r in xrange(0, numFeatures): row = [] for c in xrange(0, n2): row.append(float(seqGroup2[r][c]) / parentSeqGroup2[r][c]) propGroup2.append(row) # calculate t-statistics for unpooled variances for each feature T_statistics, effectSizes, notes = calc_twosample_ts( propGroup1, propGroup2) # generate statistics using non-parametric t-test based on permutations of the t-statistic pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs = permuted_statistics( propGroup1, propGroup2, seqGroup1, seqGroup2, T_statistics, coverage, B, progress) if progress != None and progress.wasCanceled(): return [], [], [], [], [], [] # generate p values for sparse data using fisher's exact test fishers = Fishers(preferences) diffBetweenProp = DiffBetweenPropAsymptoticCC(preferences) for r in xrange(0, numFeatures): if sum(seqGroup1[r]) < n1 and sum(seqGroup2[r]) < n2: p1, p2, note = fishers.hypothesisTest(sum(seqGroup1[r]), sum(seqGroup2[r]), sum(parentSeqGroup1[r]), sum(parentSeqGroup2[r])) l, u, es, note = diffBetweenProp.run(sum(seqGroup1[r]), sum(seqGroup2[r]), sum(parentSeqGroup1[r]), sum(parentSeqGroup2[r]), coverage) pValuesOneSided[r] = p1 pValuesTwoSided[r] = p2 lowerCIs[r] = l upperCIs[r] = u effectSizes[r] = es notes[r] = "heuristic: statistics calculated with Fisher's test" return pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs, effectSizes, notes
def testFishers(self): """Verify computation of Fisher's exact test (minimum-likelihood approach)""" from stamp.plugins.samples.statisticalTests.Fishers import Fishers fishers = Fishers(preferences) # Ground truth obtained from R version 2.10 oneSided, twoSided, _ = fishers.hypothesisTest(table1[0], table1[1], table1[2], table1[3]) self.assertAlmostEqual(oneSided, 0.16187126209690825) self.assertAlmostEqual(twoSided, 0.2715543327789185) oneSided, twoSided, _ = fishers.hypothesisTest(table2[0], table2[1], table2[2], table2[3]) self.assertAlmostEqual(oneSided, 2.220446049e-16) self.assertAlmostEqual(twoSided, 2.220446049e-16) oneSided, twoSided, _ = fishers.hypothesisTest(0.0, 0.0, 920852.999591, 953828.994346) self.assertAlmostEqual(oneSided, 1.0) self.assertAlmostEqual(twoSided, 1.0)
def detect_differentially_abundant_features(seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, coverage, B, preferences, progress): numFeatures = len(seqGroup1) n1 = len(seqGroup1[0]) n2 = len(seqGroup2[0]) # convert to proportions propGroup1 = [] for r in xrange(0, numFeatures): row = [] for c in xrange(0, n1): row.append(float(seqGroup1[r][c]) / parentSeqGroup1[r][c]) propGroup1.append(row) propGroup2 = [] for r in xrange(0, numFeatures): row = [] for c in xrange(0, n2): row.append(float(seqGroup2[r][c]) / parentSeqGroup2[r][c]) propGroup2.append(row) # calculate t-statistics for unpooled variances for each feature T_statistics, effectSizes, notes = calc_twosample_ts(propGroup1, propGroup2) # generate statistics using non-parametric t-test based on permutations of the t-statistic pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs = permuted_statistics(propGroup1, propGroup2, seqGroup1, seqGroup2, T_statistics, coverage, B, progress) if progress != None and progress.wasCanceled(): return [], [], [], [], [], [] # generate p values for sparse data using fisher's exact test fishers = Fishers(preferences) diffBetweenProp = DiffBetweenPropAsymptoticCC(preferences) for r in xrange(0, numFeatures): if sum(seqGroup1[r]) < n1 and sum(seqGroup2[r]) < n2: p1, p2, note = fishers.hypothesisTest(sum(seqGroup1[r]), sum(seqGroup2[r]), sum(parentSeqGroup1[r]), sum(parentSeqGroup2[r])) l, u, es, note = diffBetweenProp.run(sum(seqGroup1[r]), sum(seqGroup2[r]), sum(parentSeqGroup1[r]), sum(parentSeqGroup2[r]), coverage) pValuesOneSided[r] = p1 pValuesTwoSided[r] = p2 lowerCIs[r] = l upperCIs[r] = u effectSizes[r] = es notes[r] = "heuristic: statistics calculated with Fisher's test" return pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs, effectSizes, notes
class GTestFisher(AbstractSampleStatsTestPlugin): ''' Perform G-test w/ Yates' correction or Fisher's exact test. ''' def __init__(self, preferences): AbstractSampleStatsTestPlugin.__init__(self, preferences) self.name = 'G-test (w/ Yates\') + Fisher\'s' self.fishers = Fishers(self.preferences) self.gTestYates = GTestYates(self.preferences) def hypothesisTest(self, seq1, seq2, totalSeq1, totalSeq2): a = seq1 b = seq2 c = totalSeq1 - seq1 d = totalSeq2 - seq2 if a < 20 or b < 20 or c < 20 or d < 20: return self.fishers.hypothesisTest(seq1, seq2, totalSeq1, totalSeq2) else: return self.gTestYates.hypothesisTest(seq1, seq2, totalSeq1, totalSeq2)
def __init__(self, preferences): AbstractSampleStatsTestPlugin.__init__(self, preferences) self.name = 'G-test (w/ Yates\') + Fisher\'s' self.fishers = Fishers(self.preferences) self.gTestYates = GTestYates(self.preferences)