def _test_hypergeom(m, n, n1, n2): R_pval = r.phyper(min(n1, n2), n1, n - n1, n2)[0] \ - r.phyper(m - 1, n1, n - n1, n2)[0] f_pval = fisher.pvalue(*_fury_table(m, n, n1, n2)).right_tail # at least to 10 sig figs R_str = ('%.10f' % R_pval) f_str = ('%.10f' % f_pval) print 'R:', R_str, 'Fisher:', f_str assert R_str == f_str
def test_enrichment(test_set, gene_set, universe): """ Tests the test_set for enrichment relative to the gene_set using the hypergeometric test. Returns the probability of seeing at least this many draws in the test set that come from the gene set. @arg test_set: The gene set we wish to test for enrichment. @arg gene_set: The background set of interest, e.g. genes in a GO category. @arg universe: The genes we have selected our gene set from. """ from rpy2.robjects import r assert test_set <= universe assert gene_set <= universe white_drawn = len(test_set.intersection(gene_set)) white = len(gene_set) black = len(universe) - len(gene_set) draws = len(test_set) #import IPython; IPython.Debugger.Pdb().set_trace() kwargs = {'lower.tail': False} logging.debug('%4d black; %4d white; %4d draws; %4d white draws', black, white, draws, white_drawn) return white_drawn, white, black, draws, r.phyper(white_drawn - 1, white, black, draws, **kwargs)[0]