def test_pairwiseproptest(self): ppt = smprop.proportions_chisquare_allpairs(self.n_success, self.nobs, multitest_method=None) assert_almost_equal(ppt.pvals_raw, self.res_ppt_pvals_raw) ppt = smprop.proportions_chisquare_allpairs(self.n_success, self.nobs, multitest_method='h') assert_almost_equal(ppt.pval_corrected(), self.res_ppt_pvals_holm) pptd = smprop.proportions_chisquare_pairscontrol(self.n_success, self.nobs, multitest_method='hommel') assert_almost_equal(pptd.pvals_raw, ppt.pvals_raw[:len(self.nobs) - 1], decimal=13)
def test_number_pairs_1493(self): ppt = smprop.proportions_chisquare_allpairs(self.n_success[:3], self.nobs[:3], multitest_method=None) assert_equal(len(ppt.pvals_raw), 3) idx = [0, 1, 3] assert_almost_equal(ppt.pvals_raw, self.res_ppt_pvals_raw[idx])
'N/A - No control group (comparing every group against all other variants)' ) control_type = st.radio('Select Control Group', control_lst) st.markdown( '*__Example:__*<br /> _In our weekday example, if Monday is the control group, you want to compare email open rate for:_ <br /> - Monday vs Wednesday <br /> - Monday vs Friday <br />', unsafe_allow_html=True) if st.button('Calculate '): if type(try_data_tbl(g_1, g_2, g_3)) == type(pd.DataFrame()): st.write(try_data_tbl(g_1, g_2, g_3)) st.text( 'The calculation uses the holm-sidak method to adjust p-values in multiple tests.' ) results = proportions_chisquare_allpairs(np.array(g_2), np.array(g_3)) #st.write(results) p_values = results.pval_corrected() pair_names = results.all_pairs_names sig = '' nsig = '' if control_type == 'N/A - No control group (comparing every group against all other variants)': for i in range(len(p_values)): t = f'__{g_1[int(pair_names[i][1])]}__ ({str(round(g_2[int(pair_names[i][1])]/g_3[int(pair_names[i][1])]*100,4))}%) and __{g_1[int(pair_names[i][4])]}__ ({str(round(g_2[int(pair_names[i][4])]/g_3[int(pair_names[i][4])]*100,4))}%) are' if float(p_values[i]) < a: sig = sig + ( '\n' + f'{t}' + ' <span style="color:green">significantly different</span> <br />' ) elif float(p_values[i]) >= a: nsig = nsig + (
ad_cats_table[ad_cats_table.isnull()] = 0 ad_cats_freq = ad_cats_table.apply(lambda x: x/sum(x), 1) # no_rem is short for no remarketing, which is what happens when you're # shown ads for a site you've visited. ad_cats_table_no_rem = pd.DataFrame(all_ad_cats_no_remarketing).transpose() ad_cats_table_no_rem[ad_cats_table_no_rem.isnull()] = 0 ad_cats_freq_no_rem = ad_cats_table_no_rem.apply(lambda x: x/sum(x), 1) ad_cats_freq.to_csv('ad_cats_freq.tsv', sep='\t') ad_cats_table.to_csv('ad_cats_counts.tsv', sep='\t') ad_cats_table_no_rem.to_csv('ad_cats_counts_no_rem.tsv', sep='\t') ad_cats_freq_no_rem.to_csv('ad_cats_freq_no_rem.tsv', sep='\t') # do some statistical tests... chisquare? chi2_cats = proportions_chisquare_allpairs(ad_cats_table, ad_cats_table.sum(1)) chi2_cats_no_rem = proportions_chisquare_allpairs(ad_cats_table_no_rem, ad_cats_table_no_rem.sum(1)) # correlation coefficient ad_cats_corr = ad_cats_freq.T.corr() ad_cats_corr_no_rem = ad_cats_freq_no_rem.T.corr() # create bar plots for the ads targeted in each category? xlab = np.arange(ad_cats_table.shape[1]) for category in ad_cats_table.index: plt.clf() plt.bar(xlab, ad_cats_freq.loc[category]) plt.ylim(0, 0.3) plt.xticks(xlab+0.5, ad_cats_table.columns, rotation='vertical') plt.title(category.split('_')[0]) plt.subplots_adjust()