def test_fdr_bky(): # test for fdrcorrection_twostage # example from BKY pvals = [0.0001, 0.0004, 0.0019, 0.0095, 0.0201, 0.0278, 0.0298, 0.0344, 0.0459, 0.3240, 0.4262, 0.5719, 0.6528, 0.7590, 1.000 ] #no test for corrected p-values, but they are inherited #same number of rejection as in BKY paper: #single step-up:4, two-stage:8, iterated two-step:9 #also alpha_star is the same as theirs for TST #print fdrcorrection0(pvals, alpha=0.05, method='indep') #print fdrcorrection_twostage(pvals, alpha=0.05, iter=False) res_tst = fdrcorrection_twostage(pvals, alpha=0.05, iter=False) assert_almost_equal([0.047619, 0.0649], res_tst[-1][:2],3) #alpha_star for stage 2 assert_equal(8, res_tst[0].sum())
def test_fdr_bky(): # test for fdrcorrection_twostage # example from BKY pvals = [0.0001, 0.0004, 0.0019, 0.0095, 0.0201, 0.0278, 0.0298, 0.0344, 0.0459, 0.3240, 0.4262, 0.5719, 0.6528, 0.7590, 1.000 ] #no test for corrected p-values, but they are inherited #same number of rejection as in BKY paper: #single step-up:4, two-stage:8, iterated two-step:9 #also alpha_star is the same as theirs for TST #print fdrcorrection0(pvals, alpha=0.05, method='indep') #print fdrcorrection_twostage(pvals, alpha=0.05, iter=False) res_tst = fdrcorrection_twostage(pvals, alpha=0.05, iter=False) assert_almost_equal([0.047619, 0.0649], res_tst[-1][:2],3) #alpha_star for stage 2 assert_equal(8, res_tst[0].sum())
df_comments_sorted_T5.head() # %% # compare expected and observed using chi-square test for idx in df_comments_sorted_T5.index: ## use expected = df_comments_sorted.sum(0) * (df_comments_sorted.loc[idx,:].sum() / all_sum) observed = df_comments_sorted_T5.loc[idx,:] chi_result.append([idx,chisquare(observed,f_exp=expected)]) # %% df_chi = pd.DataFrame([[a[0],a[1].statistic,a[1].pvalue] for a in chi_result]) df_chi.columns = ['condition','statistic','pvalue'] df_chi['adj_pvalue'] = fdrcorrection_twostage(df_chi['pvalue'])[1] df_chi.head() # %% df_chi_sorted = df_chi[df_chi['adj_pvalue'] < 0.05].sort_values('pvalue') df_chi_sorted.head(10) # %% plt.figure(figsize=(12,12)) i =1 for idx in df_chi_sorted['condition'][:10]: