def wilcoxon_test_err_and_dt(dt_exp=-19, err_exp=-13): res = defaultdict(list) for var in FIELDS_MAP.keys(): dt_df = get_svd('dt', 2, var)[dt_exp] err_df = get_svd('err', 2, var)[err_exp] for fc in range(1, 6): w_test_res = pg.wilcoxon(dt_df[fc], err_df[fc]) pval = w_test_res['p-val'].values[0] res[var].append(pval) return pd.DataFrame(res, index=range(1, 6))
def wilcoxon_test(err_or_dt, base, min_exp_bound=-float('inf'), ignore_exp=()): dict_for_steps_pairs = defaultdict(lambda: defaultdict(list)) for var in FIELDS_MAP.keys(): steps_var_data = get_svd(err_or_dt, base, var, min_exp_bound, ignore_exp) svd_items = tuple(steps_var_data.items()) for (s1, df1), (s2, df2) in zip(svd_items[:-1], svd_items[1:]): dict_for_this_pair = dict_for_steps_pairs[(s1, s2)] for fc in range(1, 6): w_test_res = pg.wilcoxon(df1[fc], df2[fc]) pval = w_test_res['p-val'].values[0] dict_for_this_pair[var].append(pval) return {k: pd.DataFrame(v, index=range(1, 6)) for k, v in dict_for_steps_pairs.items()}
def nonparamsignificance(self, *arrays): from scipy.stats import mannwhitneyu, kruskal, wilcoxon import pingouin as pg self.d1 = arrays[0] self.d2 = arrays[1] metode = [] p = [] stat = [] ket = [] def keterangan(pval): alpha = 0.05 if pval > alpha: return 'Same distribution (fail to reject H0)' else: return 'Different distribution (reject H0)' metode.append('Mann-Whitney U test') a, b = mannwhitneyu(self.d1, self.d2) stat.append(a) p.append(b) ket.append(keterangan(b)) metode.append('Kruskal-Wallis H Test') a, b = kruskal(self.d1, self.d2) stat.append(a) p.append(b) ket.append(keterangan(b)) metode.append('Wilcoxon') a, b = wilcoxon(self.d1, self.d2, correction=True) stat.append(a) p.append(b) ket.append(keterangan(b)) results = { 'Method': metode, 'Statistic': stat, 'p-value': p, 'Conclusion': ket, } results = pd.DataFrame(results) print('5-Number of statistic for D1:') self.fivenumberplus(self.d1) print('5-Number of statistic for D2:') self.fivenumberplus(self.d2) print(results) print(pg.wilcoxon(self.d1, self.d2, tail='two-sided')) return results
def perform_kappa_ttest(self): # MIXED ANOVA ------------------------------------------------------------------------------------------------ print( "\nPerforming unpaired T-test between activity groups on Cohen's Kappa values." ) high = self.df_kappa_long.groupby("Group").get_group("HIGH")["Kappa"] low = self.df_kappa_long.groupby("Group").get_group("LOW")["Kappa"] self.kappa_ttest = pg.ttest(high, low, paired=False, correction='auto') # Approximates hedges g using d x (1 - (3 / (4*(n1 + n2) - 9)) self.kappa_ttest["hedges-g"] = self.kappa_ttest["cohen-d"] * ( 1 - (3 / (4 * 2 * high.shape[0] - 9))) print(self.kappa_ttest) self.kappa_wilcoxon = pg.wilcoxon(high, low) print(self.kappa_wilcoxon)
'''New November stuff''' #anterior hippopcamus df = pd.read_csv('extracted_mem_apriori_gPPI.csv') df = df.set_index(['target', 'seed', 'cope', 'subject']).sort_index() df = (df.loc['hc_head'] - df.loc['rACC']) # df = (df.loc['CS+E'] - df.loc['CS+A']) df = df.loc[('amyg_bla', ['CS+A', 'CS+E']), ].reset_index() df['group'] = df.subject.apply(lgroup) fig, ax = plt.subplots() sns.barplot(data=df, x='group', y='conn', hue='cope', palette=['darkmagenta', 'seagreen'], ax=ax) ax.set_ylabel('vmPFC > Amyg. BLA connectivity') ax.set_title('gPPI seed = Hc Head (anterior)') pg.wilcoxon(df.conn[df.cope == 'CS+E'][df.group == 'healthy'], df.conn[df.cope == 'CS+A'][df.group == 'healthy']) stats = pd.read_csv('extracted_mem_apriori_gPPI.csv') stats['group'] = stats.subject.apply(lgroup) stats = stats.set_index(['target', 'seed', 'cope', 'group', 'subject']) pg.wilcoxon(stats.loc[('sgACC', 'hc_head', 'CS+E', 'healthy'), 'conn'], stats.loc[('amyg_bla', 'hc_head', 'CS+E', 'healthy'), 'conn']) out = stats.loc[(['sgACC', 'amyg_bla'], 'hc_head', ['CS+E', 'CS+A']), 'conn']
def wilcoxon_test(self, value_col, group_col, condition=False, baseline=None, display_result=True, file=None, display_info=True): # collect data df = self.__get_condition(self.df, condition) # collect group values to compare groups = self.__ordered_values(group_col) # baseline gets special treatment if baseline is not None: groups = [x for x in groups if x != baseline] groups.append(baseline) # setup dict to construct dataframe if baseline == None: results = { 'A': [], 'B': [], 'W': [], 'p': [], 'bonf': [], 'RBC': [], 'CLES': [] } else: results = {} for group in groups: results[group] = [] # collect all pairs to compare to_compare = [] for g1 in groups: for g2 in groups: if g1 != g2 and not (g2, g1) in to_compare: to_compare.append((g1, g2)) # compute results if baseline == None: # compare all groups to each other for (g1, g2) in to_compare: # perform wilcoxon s1 = df[df[group_col] == g1][value_col] s2 = df[df[group_col] == g2][value_col] stats = wilcoxon(s1, s2) # read results W = stats['W-val'].values[0] p = stats['p-val'].values[0] bonf = self.__apply_bonferroni(p, len(to_compare)) rbc = stats['RBC'].values[0] cles = stats['CLES'].values[0] # results results['A'].append(g1) results['B'].append(g2) results['W'].append(W) results['p'].append(self.__check_p(p)) results['bonf'].append(self.__check_p(bonf)) results['RBC'].append(round(rbc, 5)) results['CLES'].append(round(cles, 5)) # create dataframe df_res = pd.DataFrame(results) else: # only compare to baseline for (g1, g2) in to_compare: # check if this is compared to baseline if g2 != baseline: continue # perform wilcoxon s1 = df[df[group_col] == g1][value_col] s2 = df[df[group_col] == g2][value_col] stats = wilcoxon(s1, s2) # read results W = stats['W-val'].values[0] p = stats['p-val'].values[0] bonf = self.__apply_bonferroni(p, len(groups) - 1) rbc = stats['RBC'].values[0] cles = stats['CLES'].values[0] # results results[g1].append(self.__check_p(p)) results[g1].append(self.__check_p(bonf)) results[g1].append(W) results[g1].append(round(rbc, 5)) df_res = pd.DataFrame(results, index=pd.Index(['p', 'bonf', 'W', 'r'], name='value'), columns=pd.Index(groups[:-1], name='group')) if display_result: if display_info: print("################") print("### Wilcoxon ###") print("################") if not condition is False: print(self.__condition_to_string(condition)) display(df_res) print("") if file is not None: df_res.to_csv(file)
def test_posthoc(df, dep_var, ind_vars, is_non_normal=None): # print(f'\n{dep_var}') ind_vars = sorted(ind_vars) if is_non_normal == None: normality_p = test_normality(df, dep_var, ind_vars) significants = [p for p in normality_p if p < 0.01] is_non_normal = len(significants) > 0 iv_combinations = [] for iv in ind_vars: for iv1 in ind_vars: if (iv != iv1) and ((iv, iv1) not in iv_combinations) and ( (iv1, iv) not in iv_combinations): iv_combinations.append((iv, iv1)) for comb in iv_combinations: x = df.loc[df['Condition number'] == comb[0]][dep_var] y = df.loc[df['Condition number'] == comb[1]][dep_var] try: if is_non_normal: # s, p = wilcoxon(x, y) results = pg.wilcoxon(x, y, alternative='two-sided') results = results.round(4) t = list(results['W-val'])[0] p = list(results['p-val'])[0] prefix = ' ' if p < .05: prefix = '* ' if p < .01: prefix = '** ' if p < .001: prefix = '***' print( f'{prefix}{comb} Wilco: W={round(t, 2)}, p={round(p, 3)}') else: paired = True if len(x) == len(y) else False results = pg.ttest(x, y, paired=paired, alternative='two-sided') results = results.round(4) t = list(results['T'])[0] p = list(results['p-val'])[0] prefix = ' ' if p < .05: prefix = '* ' if p < .01: prefix = '** ' if p < .001: prefix = '***' print( f'{prefix}{comb} Ttest: t={round(t, 2)}, p={round(p, 3)}') except Exception as e: print(f'Error in {comb}: {e}') return
def qualOrdinalPaired(imgDir, sheetName, sheetDf, sheetScale, silent=True): print("######################################## ", sheetName, " ########################################" ) if not silent else None meltedSheetDf = sheetDf.melt(var_name='Factor', value_name='Variable') contingencySheetDf = pd.crosstab(index=meltedSheetDf['Variable'], columns=meltedSheetDf['Factor']) statDf = pd.DataFrame(columns=[ 'COMPARISON', 'TEST', 'STATISTICS', 'P-VALUE', 'EFFECT SIZE' ]) #fill empty scale value for sheetStep in range(sheetScale): if not sheetStep in contingencySheetDf.index.values: contingencySheetDf.loc[sheetStep] = [ 0 for x in range(len(contingencySheetDf.columns.values)) ] contingencySheetDf.sort_index(inplace=True) # ALL MODALITY if len(contingencySheetDf.columns) > 2: sheetDf_long = sheetDf.melt(ignore_index=False).reset_index() friedman_stats = pg.friedman(data=sheetDf_long, dv="value", within="variable", subject="index") source, wvalue, ddof1, qvalue, pvalue = friedman_stats.values[0] statDf = statDf.append( { 'COMPARISON': 'ALL', 'TEST': "Friedman", 'STATISTICS': qvalue, 'P-VALUE': pvalue, 'EFFECT SIZE': wvalue }, ignore_index=True) # BETWEEN MODALITY modality_names = sheetDf.columns.values uncorrectedStatIndex = len(statDf.index) for i in range(len(modality_names)): for j in range(i + 1, len(modality_names)): stats_wilcoxon = pg.wilcoxon(sheetDf.loc[:, modality_names[i]], sheetDf.loc[:, modality_names[j]], correction=False, alternative='two-sided') wvalue, alternative, pvalue, RBC, CLES = stats_wilcoxon.values[ 0] statDf = statDf.append( { 'COMPARISON': modality_names[i] + '|' + modality_names[j], 'TEST': "Wilcoxon", 'STATISTICS': wvalue, 'P-VALUE': pvalue, 'EFFECT SIZE': RBC }, ignore_index=True) reject, statDf.loc[uncorrectedStatIndex::, 'P-VALUE'] = pg.multicomp( statDf.loc[uncorrectedStatIndex::, 'P-VALUE'].values, alpha=0.05, method="holm") StackedBarPlotter.StackedBarPlotter(filename=imgDir + '/' + sheetName + '.png', title=sheetName, dataDf=sheetDf, histDf=contingencySheetDf, statDf=statDf)