def build_partial_corr(corr_df, target, covar, method='pearson', padjust='fdr_bh', pval=0.05, covar_name=None): """ Builds partial correlation DataFrame from corr_df of the target survey, controlling for covar. corr_df (pd.DataFrame): correlation frame, assuming each row is an observation target (str): targe column, can be a string prefix or suffix covar (list): a list of covariates to control for covar_name (str): optional name for covariates in the display """ partial_corr = pg.pairwise_corr(data=corr_df, covar=covar, method=method) _, p_adj = pg.multicomp(partial_corr['p-unc'].values, alpha=pval, method=padjust) partial_corr['p-corr'] = p_adj partial_corr = partial_corr.loc[(partial_corr['p-corr'] < pval) & (~partial_corr['X'].str.contains(target)) & (partial_corr['Y'].str.contains(target))] partial_corr['r_ctl'] = partial_corr['r'] partial_corr['p_ctl'] = partial_corr['p-corr'] if covar_name is not None: partial_corr['covar'] = covar_name partial_corr = partial_corr[['X', 'Y', 'covar', 'r_ctl', 'p_ctl']] # drop the controlling covars for the raw pairwise correlation pairwise_corr = pg.pairwise_corr(data=corr_df.drop(covar, axis='columns'), method=method, padjust=padjust) pairwise_corr['r_unctl'] = pairwise_corr['r'] pairwise_corr['p_unctl'] = pairwise_corr['p-corr'] partial_corr = partial_corr.merge(pairwise_corr[['X', 'Y', 'r_unctl', 'p_unctl', 'n']], on=['X', 'Y'], how='left').sort_values('p_ctl') return partial_corr.style.set_caption(method)
# add stats to title for ax in grid.axes.flat: ax.set(yscale="symlog") ax.set_ylim(bottom=0) var = ax.get_title().replace("population = ", "") try: child, parent = re.findall(r"(.*)/(.*)", var)[0] ax.set_title(child) ax.set_ylabel("Cells / uL") except IndexError: ax.set_title(var) grid.savefig(figfile) plt.close(grid.fig) import pingouin as pg m = matrix.join(meta[["severity_group"]]) m["severity_group"] = m["severity_group"].cat.remove_unused_categories() res = pd.concat( [ pg.pairwise_ttests( data=m, dv=var, between="severity_group", parametric=False ).assign(variable=var) for var in m.columns[:-1] ] ).drop(["Contrast"], axis=1) res["p-cor"] = pg.multicomp(res["p-unc"].values, method="fdr_bh")[1] res.to_csv("diff.absolute.csv", index=False)
plt.hist(x=dataFrame.slope) #plt.show() if withImage_anova == 'yes': # 2-way ANOVA aov = pg.mixed_anova(dv='slope', between='group', within='eye', subject='subject', data=dataFrame) aov.round(3) aov # Bonferroni correction pvals = [aov['p-unc'][0], aov['p-unc'][1], aov['p-unc'][2]] reject, pvals_corr = pg.multicomp(pvals, method='fdr_bh') print(reject, pvals_corr) for sub in subjects: data = cs_data.loc[cs_data.id == sub] y, slope, eye = getRegressionCoeff(data) x = data.test_num.unique().tolist() # Assign color for plots if sub[0:3] == 'ASW': plot_palette = plot_colors[3] elif sub[0:2] == 'AS' or sub[0:2] == 'AM': plot_palette = plot_colors[0] elif sub[0:2] == 'AA':
def stats(model, quantity, data, targets, tw, rm, nd): if model == 'absolute': data = data.drop(['NormQuant'], axis=1) data['NormMean'] = data['NormMean'].astype(float) mean = 'NormMean' else: data = data.drop(['rq'], axis=1) data['rqMean'] = data['rqMean'].astype(float) mean = 'rqMean' # prepare data from intermediate dataframe data = data[data['Outliers'].eq(False)] data = data.drop_duplicates(keep='first') # t-test and anova for normally distributed data if nd == 'True': if quantity == 2: # T-Test between 2 groups stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() group = data['Group'].dropna() group = group.drop_duplicates(keep='first').values.tolist() for item in targets: df = data[data['Target Name'].eq(item)] group1 = df[df['Group'].eq(group[0])][mean] group2 = df[df['Group'].eq(group[1])][mean] t_test = ttest(group1, group2, paired=bool(rm)) if rm == 'True': t_test['paired'] = 'TRUE' else: t_test['paired'] = 'FALSE' t_test['Target Name'] = item if stats_dfs is None: stats_dfs = t_test else: stats_dfs = stats_dfs.append(t_test, ignore_index=True) # reformat output table stats_dfs = stats_dfs.rename(columns={ 'cohen-d': 'effect size', 'BF10': 'Bayes factor', 'dof': 'DF' }) cols = [ 'Target Name', 'DF', 'T', 'tail', 'paired', 'p-val', 'effect size', 'power', 'Bayes factor' ] stats_dfs = stats_dfs.reindex(columns=cols) elif quantity >= 3: # ANOVA test stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() # tukey_dfs = pandas.DataFrame() pvals = [] for item in targets: if rm == 'True': # one-way if tw == 'False': # repeated measure anova aov = pg.rm_anova( dv=mean, data=data[data['Target Name'].eq(item)], within='Group', subject='Sample Name', detailed=True) pvals.append(aov['p-unc'][0]) aov = aov.drop([1]) aov['measures'] = ['dependent'] aov['Target Name'] = item # two-way else: aov = pg.rm_anova( dv=mean, data=data[data['Target Name'].eq(item)], within=['Group1', 'Group2'], subject='Sample Name', detailed=True) reject_tw, pval_corr_tw = pg.multicomp(list( aov['p-unc']), alpha=0.05, method='bonf') aov['p-value corrected'] = pval_corr_tw aov['measures'] = ['dependent'] * 3 aov['Target Name'] = [item] * 3 aov.drop(['eps'], axis=1) ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, within='Group', subject='Sample Name', padjust='fdr_bh') ph['Target Name'] = item ph['Test'] = 'T-Test' else: # one-way if tw == 'False': aov = pg.anova(dv=mean, between='Group', data=data[data['Target Name'].eq(item)], detailed=True) pvals.append(aov['p-unc'][0]) aov = aov.drop([1]) aov['measures'] = ['independent'] aov['Target Name'] = item ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, between='Group', padjust='fdr_bh') ph['Test'] = 'T-Test' # two-way else: aov = pg.anova(dv=mean, between=['Group1', 'Group2'], data=data[data['Target Name'].eq(item)], detailed=False) aov = aov.drop([3]) reject_tw, pval_corr_tw = pg.multicomp(list( aov['p-unc']), alpha=0.05, method='bonf') aov['p-value corrected'] = pval_corr_tw aov['measures'] = ['independent'] * 3 aov['Target Name'] = [item] * 3 ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, between=['Group1', 'Group2'], padjust='fdr_bh') ph['Test'] = 'T-Test' ph['Target Name'] = item if stats_dfs is None: stats_dfs = aov else: stats_dfs = stats_dfs.append(aov, ignore_index=True) if posthoc_dfs is None: posthoc_dfs = ph else: posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True) reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf') # reformat output tables stats_dfs = stats_dfs.rename(columns={ 'p-unc': 'p-value', 'np2': 'effect size' }) if tw == 'False': stats_dfs['p-value corrected'] = pvals_corr stats_dfs['distribution'] = ['parametric'] * len(targets) stats_dfs['test'] = ['ANOVA'] * len(targets) stats_dfs['statistic'] = ['NA'] * len(targets) else: stats_dfs['distribution'] = ['parametric'] * (len(targets) * 3) stats_dfs['test'] = ['ANOVA'] * (len(targets) * 3) stats_dfs['statistic'] = ['NA'] * (len(targets) * 3) cols = [ 'Target Name', 'Source', 'DF', 'F', 'MS', 'SS', 'p-value', 'p-value corrected', 'measures', 'distribution', 'test', 'statistic', 'effect size' ] stats_dfs = stats_dfs.reindex(columns=cols) if tw == 'False': posthoc_dfs = posthoc_dfs.drop(['Contrast', 'T'], axis=1) else: posthoc_dfs = posthoc_dfs.drop(['T'], axis=1) posthoc_dfs = posthoc_dfs.rename( columns={ 'hedges': 'effect size', 'p-corr': 'p-value corrected', 'p-unc': 'p-value', 'p-adjust': 'correction method', 'BF10': 'Bayes factor', 'dof': 'DF' }) if tw == 'False': cols2 = [ 'Target Name', 'A', 'B', 'DF', 'p-value corrected', 'p-value', 'correction method', 'Paired', 'Parametric', 'Test', 'effect size', 'Bayes factor' ] else: cols2 = [ 'Target Name', 'Contrast', 'Group1', 'A', 'B', 'DF', 'p-value corrected', 'p-value', 'correction method', 'Paired', 'Parametric', 'Test', 'effect size', 'Bayes factor' ] posthoc_dfs = posthoc_dfs.reindex(columns=cols2) # nonparametric tests for not normally distributed data else: if quantity == 2: stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() group = data['Group'].dropna() group = group.drop_duplicates(keep='first').values.tolist() for item in targets: df = data[data['Target Name'].eq(item)] group1 = df[df['Group'].eq(group[0])][mean] group2 = df[df['Group'].eq(group[1])][mean] if rm == 'True': # Mann-Whitney U test test = mannwhitneyu(group1, group2) test = pandas.DataFrame( { 'Target Name': item, 'pvalue': test.pvalue, 'statistic': test.statistic }, index=[0]) else: # Wilcoxon test = wilcoxon(group1, group2) test = pandas.DataFrame( { 'Target Name': item, 'pvalue': test.pvalue, 'statistic': test.statistic }, index=[0]) if stats_dfs is None: stats_dfs = test else: stats_dfs = stats_dfs.append(test, ignore_index=True) elif quantity >= 3: stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() pvals = [] for item in targets: if rm == 'True': # friedman test for repeated measurements df = pg.friedman(dv=mean, within='Group', subject='Sample Name', data=data[data['Target Name'].eq(item)]) pvals.append(df['p-unc'][0]) df['test'] = ['Friedman Q'] df['measures'] = ['dependent'] df = df.rename(columns={'Q': 'statistic'}) df['Target Name'] = item df['DF'] = 'NA' ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, within='Group', subject='Sample Name', padjust='fdr_bh', parametric=False) ph['Target Name'] = item ph['DF'] = 'NA' ph['Bayes factor'] = 'NA' ph['Test'] = 'Wilcoxon' else: # Kruskal-Wallis H test df = pg.kruskal(dv=mean, between='Group', data=data[data['Target Name'].eq(item)]) pvals.append(df['p-unc'][0]) df['test'] = ['Kruskal-Wallis H'] df['measures'] = ['independent'] df = df.rename(columns={'H': 'statistic'}) df['Target Name'] = item df['DF'] = 'NA' ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, between='Group', padjust='fdr_bh', parametric=False) ph['Target Name'] = item ph['DF'] = 'NA' ph['Bayes factor'] = 'NA' ph['Test'] = 'Mann-Whitney U' if stats_dfs is None: stats_dfs = df else: stats_dfs = stats_dfs.append(df, ignore_index=True) if posthoc_dfs is None: posthoc_dfs = ph else: posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True) reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf') # reformat output tables stats_dfs = stats_dfs.rename(columns={ 'dof': 'DF', 'p-unc': 'p-value' }) stats_dfs['p-value corrected'] = pvals_corr stats_dfs['distribution'] = ['non-parametric'] * len(targets) stats_dfs['MS'] = ['NA'] * len(targets) stats_dfs['SS'] = ['NA'] * len(targets) stats_dfs['effect size'] = ['NA'] * len(targets) cols = [ 'Target Name', 'DF', 'MS', 'SS', 'p-value', 'p-value corrected', 'measures', 'distribution', 'test', 'statistic', 'effect size' ] stats_dfs = stats_dfs.reindex(columns=cols) posthoc_dfs = posthoc_dfs.drop(['Contrast'], axis=1) posthoc_dfs = posthoc_dfs.rename( columns={ 'hedges': 'effect size', 'p-corr': 'p-value corrected', 'p-unc': 'p-value', 'p-adjust': 'correction method', 'BF10': 'Bayes factor' }) cols2 = [ 'Target Name', 'A', 'B', 'DF', 'p-value corrected', 'p-value', 'correction method', 'Paired', 'Parametric', 'Test', 'effect size', 'Bayes factor' ] posthoc_dfs = posthoc_dfs.reindex(columns=cols2) return stats_dfs, posthoc_dfs
def check_mcnemar_significance(mcnemar_pvals): import pingouin as pg reject, pvals = pg.multicomp(mcnemar_pvals, alpha=0.05, method="holm") return reject, pvals
def swarmboxenplot( data: DataFrame, x: str, y: tp.Union[str, Iterables], hue: str = None, swarm: bool = True, boxen: bool = True, bar: bool = False, ax: tp.Union[Axis, tp.Sequence[Axis]] = None, test: tp.Union[bool, str] = "mann-whitney", multiple_testing: tp.Union[bool, str] = "fdr_bh", test_upper_threshold: float = 0.05, test_lower_threshold: float = 0.01, plot_non_significant: bool = False, plot_kws: tp.Dict[str, tp.Any] = None, test_kws: tp.Dict[str, tp.Any] = None, fig_kws: tp.Dict[str, tp.Any] = None, ) -> tp.Optional[tp.Union[Figure, DataFrame, tp.Tuple[Figure, DataFrame]]]: """ A categorical plot that overlays individual observations as a swarm plot and summary statistics about them in a boxen plot. In addition, this plot will test differences between observation groups and add lines representing a significant difference between them. Parameters ---------- data: pd.DataFrame A dataframe with data where the rows are the observations and columns are the variables to group them by. x: str The categorical variable. y: str | list[str] The continuous variable to plot. If more than one is given, will ignore the `ax` attribute and return figure with a subplot per each `y` variable. hue: str, optional An optional categorical variable to further group observations by. swarm: bool Whether to plot individual observations as a swarmplot. boxen: bool Whether to plot summary statistics as a boxenplot. ax: matplotlib.axes.Axes, optional An optional axes to draw in. test: bool | str Whether to test differences between observation groups. If `False`, will not return a dataframe as well. If a string is passed, will perform test accordingly. Available tests: - 't-test': - 'mann-whitney': - 'kruskal': Default is a parwise 'mann-whitney' test with p-value adjustment. multiple_testing: str Method for multiple testing correction. test_upper_threshold: float Upper theshold to consider p-values significant. Will be marked with "*". test_lower_threshold: float Secondary theshold to consider p-values highly significant. Will be marked with "**". plot_non_significant: bool Whether to add a "n.s." sign to p-values above `test_upper_threshold`. plot_kws: dict Additional values to pass to seaborn.boxenplot or seaborn.swarmplot test_kws: dict Additional values to pass to pingouin.pairwise_ttests. The default is: dict(parametric=False) to run a non-parametric test. Returns ------- tuple[Figure, pandas.DataFrame]: if `ax` is None and `test` is True. pandas.DataFrame: if `ax` is not None. Figure: if `test` is False. None: if `test` is False and `ax` is not None. Raises ------ ValueError: If either the `x` or `hue` column in `data` are not Category, string or object type, or if `y` is not numeric. """ # opts = dict(data=data, x='h', y='y', hue='x', test_kws=dict(parametric=False)) # opts = dict(data=data, x='cat', y='cont') # for k, v in opts.items(): # locals()[k] = v for var, name in [(x, "x"), (hue, "hue")]: if var is not None: if not data[var].dtype.name in ["category", "string", "object"]: raise ValueError( f"`{name}` variable must be categorical, string or object." ) if test_kws is None: test_kws = dict() if plot_kws is None: plot_kws = dict() data = data.sort_values([x] + ([hue] if hue is not None else [])) if isinstance(y, (list, pd.Series, pd.Index)): # TODO: display only one legend for hue if ax is None: n, m = get_grid_dims(y) default_fig_kws = dict(nrows=n, ncols=m, figsize=(m * 4, n * 4), sharex=True, squeeze=False) default_fig_kws.update(fig_kws or {}) fig, axes = plt.subplots(**default_fig_kws) axes = axes.flatten() elif isinstance(ax, np.ndarray): axes = ax.flatten() elif isinstance(ax, matplotlib.axes.Axes): axes = np.asarray([ax]) _stats = list() idx = -1 for idx, _var in enumerate(y): _ax = axes[idx] s: DataFrame = swarmboxenplot( data=data, x=x, y=_var, hue=hue, swarm=swarm, boxen=boxen, bar=bar, ax=_ax, test=test, multiple_testing=multiple_testing, test_upper_threshold=test_upper_threshold, test_lower_threshold=test_lower_threshold, plot_non_significant=plot_non_significant, plot_kws=plot_kws, test_kws=test_kws, ) _ax.set(title=_var + _ax.get_title(), xlabel=None, ylabel=None) if test is not False: _stats.append(s.assign(Variable=_var)) # "close" excess subplots for _ax in axes[idx + 1:]: _ax.axis("off") if test is not False: stats = pd.concat(_stats).reset_index(drop=True) cols = [c for c in stats.columns if c != "Variable"] stats = stats.reindex(["Variable"] + cols, axis=1) # If there is just one test per `y` (no hue), correct p-values if stats.shape == len(y): stats["p-cor"] = pg.multicomp(stats["p-unc"].values, method=multiple_testing)[1] if ax is None: return (fig, stats) if test else fig return stats if test else None if data[y].dtype.name in ["category", "string", "object"]: raise ValueError("`y` variable must be numeric.") if ax is None: fig, _ax = plt.subplots(1, 1, figsize=(4, 4)) else: _ax = ax # Plot vanilla seaborn if boxen: assert not bar # Tmp fix for lack of support for Pandas Int64 in boxenplot: if data[y].dtype.name == "Int64": data[y] = data[y].astype(float) boxen_kws = filter_kwargs_by_callable(plot_kws, sns.boxenplot) sns.boxenplot(data=data, x=x, y=y, hue=hue, ax=_ax, **boxen_kws) if bar: assert not boxen bar_kws = filter_kwargs_by_callable(plot_kws, sns.barplot) sns.barplot(data=data, x=x, y=y, hue=hue, ax=_ax, **bar_kws) if (boxen or bar) and swarm: _add_transparency_to_plot(_ax, kind="bar" if bar else "boxen") if swarm: swarm_kws = filter_kwargs_by_callable(plot_kws, sns.swarmplot) if hue is not None and "dodge" not in swarm_kws: swarm_kws["dodge"] = True with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) sns.swarmplot(data=data, x=x, y=y, hue=hue, ax=_ax, **swarm_kws) _ax.set_xticklabels(_ax.get_xticklabels(), rotation=90, ha="right") if test is not False: if test in [True, "t-test", "mann-whitney"]: test_function = pg.pairwise_ttests if test == "mann-whitney": test_kws["parametric"] = False elif test in ["kruskal"]: test_function = pg.kruskal assert hue is None, "If test is 'kruskal', 'hue' must be None." else: raise ValueError(f"Test type '{test}' not recognized.") # if not data.index.is_unique: print("Warning: dataframe contains a duplicated index.") # remove NaNs datat = data.dropna(subset=[x, y] + ([hue] if hue is not None else [])) # remove categories with only one element keep = datat.groupby(x).size()[datat.groupby(x).size() > 1].index datat = datat.loc[datat[x].isin(keep), :] if datat[x].dtype.name == "category": datat[x] = datat[x].cat.remove_unused_categories() ylim = _ax.get_ylim() # save original axis boundaries for later ylength = abs(ylim[1]) + (abs(ylim[0]) if ylim[0] < 0 else 0) # Now calculate stats # # get empty dataframe in case nothing can be calculated stat = _get_empty_stat_results(datat, x, y, hue, add_median=True) # # mirror groups to account for own pingouin order tats = stat.rename(columns={ "B": "A", "A": "B", "median_A": "median_B", "median_B": "median_A", }) stat = (pd.concat([stat, tats]).sort_values(["Contrast", "A", "B"]).reset_index(drop=True)) try: _stat = test_function( data=datat, dv=y, between=x if hue is None else [x, hue], **test_kws, ) except (AssertionError, ValueError) as e: print(str(e)) _stat = stat except KeyError: print("Only one category with values!") _stat = stat if test == "kruskal": p = _stat.squeeze()["p-unc"] symbol = ("**" if p <= test_lower_threshold else "n.s." if ((p > test_upper_threshold) or pd.isnull(p)) else "*") _ax.set_title(symbol) return (fig, _stat) if ax is None else _stat stat = _stat.merge( stat[["Contrast", "A", "B", "median_A", "median_B"] + ([x] if hue is not None else [])], how="left", ) if multiple_testing is not False: if "p-unc" not in stat.columns: stat["p-unc"] = np.nan stat["p-cor"] = pg.multicomp(stat["p-unc"].values, method=multiple_testing)[1] pcol = "p-cor" else: pcol = "p-unc" # This ensures there is a point for each `x` class and keep the order # correct for below mm = data.groupby([x] + ([hue] if hue is not None else []))[y].median() if hue is None: order = {k: float(i) for i, k in enumerate(mm.index)} else: nhues = data[hue].drop_duplicates().dropna().shape[0] order = { k: (float(i) / nhues) - (1 / nhues) - 0.05 for i, k in enumerate(mm.index) } _ax.scatter(order.values(), mm, alpha=0, color="white") # Plot significance bars # start at top of the plot and progressively decrease sig. bar downwards py = data[y].max() incr = ylength / 100 # divide yaxis in 100 steps for idx, row in stat.iterrows(): p = row[pcol] if (pd.isnull(p) or (p > test_upper_threshold)) and (not plot_non_significant): py -= incr continue symbol = ("**" if p <= test_lower_threshold else "n.s." if ((p > test_upper_threshold) or pd.isnull(p)) else "*") if hue is not None: if row[x] != "-": xx = (order[(row[x], row["A"])], order[(row[x], row["B"])]) else: try: # TODO: get more accurate middle of group xx = ( order[(row["A"], stat["A"].iloc[-1])] - (1 / nhues), order[(row["B"], stat["B"].iloc[-1])] - (1 / nhues), ) except KeyError: # These are the hue groups without contrasting on 'x' continue else: xx = (order[row["A"]], order[row["B"]]) red_fact = 0.95 # make the end position shorter _ax.plot( (xx[0], xx[1] * red_fact), (py, py), color="black", linewidth=1.2, ) _ax.text(xx[1] * red_fact, py, s=symbol, color="black", ha="center") py -= incr _ax.set_ylim(ylim) return (fig, stat) if ax is None else stat return fig if ax is None else None
def qualOrdinalUnpaired(imgDir, sheetName, sheetDf, sheetScale, silent=False): print("######################################## ", sheetName, " ########################################" ) if not silent else None meltedSheetDf = sheetDf.melt(var_name='Factor', value_name='Variable') contingencySheetDf = pd.crosstab(index=meltedSheetDf['Variable'], columns=meltedSheetDf['Factor']) statDf = pd.DataFrame(columns=[ 'COMPARISON', 'TEST', 'STATISTICS', 'P-VALUE', 'EFFECT SIZE' ]) #fill empty scale value for sheetStep in range(sheetScale): if not sheetStep in contingencySheetDf.index.values: contingencySheetDf.loc[sheetStep] = [ 0 for x in range(len(contingencySheetDf.columns.values)) ] contingencySheetDf.sort_index(inplace=True) # ALL MODALITY if len(contingencySheetDf.columns) > 2: sheetDf_long = sheetDf.melt(ignore_index=False).reset_index() kruskal_stats = pg.kruskal(data=sheetDf_long, dv="value", between="variable") source, ddof1, hvalue, pvalue = kruskal_stats.values[0] statDf = statDf.append( { 'COMPARISON': 'ALL', 'TEST': "Kruskal-Wallis", 'STATISTICS': hvalue, 'P-VALUE': pvalue, 'EFFECT SIZE': -1 }, ignore_index=True) # BETWEEN MODALITY modality_names = sheetDf.columns.values uncorrectedStatIndex = len(statDf.index) for i in range(len(modality_names)): for j in range(i + 1, len(modality_names)): stats_mannwhitney = pg.mwu(x=sheetDf.loc[:, modality_names[i]], y=sheetDf.loc[:, modality_names[j]], alternative='two-sided') uvalue, alternative, pvalue, RBC, CLES = stats_mannwhitney.values[ 0] statDf = statDf.append( { 'COMPARISON': modality_names[i] + '|' + modality_names[j], 'TEST': "Mann-Whitney", 'STATISTICS': uvalue, 'P-VALUE': pvalue, 'EFFECT SIZE': RBC }, ignore_index=True) reject, statDf.loc[uncorrectedStatIndex::, 'P-VALUE'] = pg.multicomp( statDf.loc[uncorrectedStatIndex::, 'P-VALUE'].values, alpha=0.05, method="holm") StackedBarPlotter.StackedBarPlotter(filename=imgDir + '/' + sheetName + '.png', title=sheetName, dataDf=sheetDf, histDf=contingencySheetDf, statDf=statDf)