def test_benchmark(): a, b, c, d = _gen_rand_abcd() n = 100 res = np.zeros(n) for i in range(n): startT = time.time() ORs, pvalues = fishersapi.fishers_vec(a, b, c, d, alternative='two-sided') res[i] = n / (time.time() - startT) print('Imported test: %1.2f tests per second' % np.mean(res)) n = 1 res_scipy = np.zeros(n) for i in range(n): startT = time.time() ORs, pvalues = fishersapi._scipy_fishers_vec(a, b, c, d, alternative='two-sided') res_scipy[i] = n / (time.time() - startT) print('scipy test: %1.2f tests per second' % np.mean(res_scipy))
def test_fishers_vec(): """Testing the vectorized version against scipy on random data.""" a, b, c, d = _gen_rand_abcd() n = len(a) for alt in ['two-sided', 'less', 'greater']: ORs, pvalues = fishersapi.fishers_vec(a, b, c, d, alternative=alt) scipy_pvalues, scipy_ORs = np.zeros(n), np.zeros(n) for i in range(n): scipy_ORs[i], scipy_pvalues[i] = stats.fisher_exact( [[a[i], b[i]], [c[i], d[i]]], alternative=alt) npt.assert_allclose(ORs, scipy_ORs, rtol=1e-4) npt.assert_allclose(pvalues, scipy_pvalues, rtol=1e-4)
def test_fishers_vec_minn(): """Testing the vectorized version against scipy on random data.""" a, b, c, d = _gen_rand_abcd() n = len(a) counts = a + b + c + d gtmin = np.sum(counts >= np.median(counts)) for alt in ['two-sided', 'less', 'greater']: ORs, pvalues = fishersapi.fishers_vec(a, b, c, d, alternative=alt, min_n=np.median(counts)) npt.assert_equal(gtmin, (~np.isnan(pvalues)).sum())
def _fisherNBR(res_df, ct_cols): """Applies a Fisher's exact test to every row of res_df using the 4 columns provided in count_cols. For each row, the vector of counts in count_cols can be reshaped into a 2 x 2 contingency table. The count_cols should be in the following order: a X+/MEM+ b X+/MEM- c X-/MEM+ d X-/MEM- where X+ indicates the second level of x_col (e.g. 1 for [0, 1]). The result is that that an OR > 1 = [(a / c) / (b / d)] indicates enrichment of X within the cluster. Relative-rate of X in vs. out of the cluster is also provided. Parameters ---------- res_df : pd.DataFrame [ntests x 4] Each row contains a set of 4 counts to be tested. count_cols : list Columns containing the counts in a "flattened" order such that it can be reshaped into a 2 x 2 contingency table Returns ------- res : dict A dict of three numpy vectors containing the OR, the RR and the p-value. Vectors will have length ntests, same as res_df.shape[0]""" a = res_df[ct_cols[0]].values b = res_df[ct_cols[1]].values c = res_df[ct_cols[2]].values d = res_df[ct_cols[3]].values OR, p = fishers_vec(a, b, c, d, alternative='two-sided') RR = (a / (a + c)) / (b / (b + d)) return {'RR':RR, 'OR':OR, 'pvalue':p}
def test_integers(): OR, pvalue = fishersapi.fishers_vec(10, 2, 15, 3) assert np.isscalar(OR) assert np.isscalar(pvalue)