コード例 #1
0
ファイル: utils.py プロジェクト: GeorgeWebberley/surveyApp
def kruskal_wallis(df, independent_variable, dependent_variable):
    kruskal_result = kruskal(data=df,
                             dv=dependent_variable,
                             between=independent_variable)
    # get the p-value (p-unc) from the kruskal test and convert to 4 decimal places only
    p_value = float("%.4f" % kruskal_result["p-unc"][0])
    # p_value = kruskal_result["p-unc"][0]
    result = {
        "test":
        "Kruskall Wallis Test",
        "p_value":
        p_value,
        "variable_1":
        independent_variable,
        "variable_2":
        dependent_variable,
        "null":
        f"The distribution of '{dependent_variable}' is the same across groups of '{independent_variable}'",
        "info":
        """Assumes that dependent variable ('{0}') is ordinal or continuous,
                    that the independent variable ('{1}') consists of more than 2 groups
                    and that these groups follow the same distribution (the shape on a histogram).\n
                    NOTE: It is also possible to perform this test on categories containing just 2 groups,
                    however we have not done so as it could conflict with results from Mann-Whitney U test
                    (performed on categories with 2 groups only).""".format(
            dependent_variable, independent_variable)
    }
    return p_value, result
コード例 #2
0
def calculate_kruskalwallish(dataframe, col_name="", p_adj_method="none", stats_id="", save_files=False,
                             save_dir="/Users/kyleweber/Desktop/"):

    stats_pairwise_t = None

    stats_main = pg.kruskal(data=dataframe, dv=col_name, between="Cohort", detailed=True)
    stats_main["Sig."] = ["*" if stats_main["p-unc"].iloc[0] < .05 else " "]

    # if stats_main["Sig."].iloc[0] == "*":

    stats_pairwise_t = pg.pairwise_ttests(data=dataframe, dv=col_name, between="Cohort",
                                          within=None, subject="Participant", parametric=False,
                                          marginal=True, alpha=.05, tail="two-sided",
                                          padjust=p_adj_method, effsize="cohen", correction='auto')

    stats_pairwise_t["Sig."] = ["*" if row[[i for i in stats_pairwise_t.columns].index("p-unc") + 1] < .05
                                else " " for row in stats_pairwise_t.itertuples()]

    if save_files:
        print("Saving files to {}".format(save_dir))

        stats_main.to_csv(save_dir + stats_id + "_Main.csv")
        stats_pairwise_t.to_csv(save_dir + stats_id + "_PairwiseT.csv", index=False)

    return stats_main, stats_pairwise_t
コード例 #3
0
    def apply(self,
              alpha=0.05,
              plot=True,
              filename="kruskal",
              use_latex=False):
        kruskal = pg.kruskal(dv=self.val_col,
                             between=self.group_col,
                             data=self.df)
        pvalue = kruskal['p-unc'][0]

        if plot:
            chi_squared = kruskal['H'][0]
            degree_freed = kruskal['ddof1'][0]

            p = "< 0.001" if pvalue < 0.001 else (
                "< 0.01" if pvalue < 0.01 else ("< 0.05" if pvalue < 0.05 else
                                                (round(pvalue, 3))))

            plt.figure(figsize=(70, 8))
            sns.boxplot(x=self.group_col, y=self.val_col, data=self.df)
            # Jittered BoxPlots
            sns.stripplot(x=self.group_col,
                          y=self.val_col,
                          data=self.df,
                          size=4,
                          jitter=True,
                          edgecolor="gray")
            # Add mean and median lines
            plt.axhline(y=self.df[self.val_col].mean(),
                        color='r',
                        linestyle='--',
                        linewidth=1.5)
            plt.axhline(y=self.df[self.val_col].median(),
                        color='b',
                        linestyle='--',
                        linewidth=2)

            plt.title("")
            plt.suptitle("")
            plt.xlabel(
                f"\nKruskal-Wallis chi-squared = {chi_squared}, df = {degree_freed}, p = {p}",
                labelpad=20)
            plt.ylabel('')
            plt.savefig(filename + ('.pgf' if use_latex else '.pdf'),
                        bbox_inches='tight')
            plt.clf()

        # If the Kruskal-Wallis test is significant, a post-hoc analysis can be performed
        # to determine which levels of the independent variable differ from each other level.
        if pvalue < alpha:
            eff = effect_size(self.df, self.val_col, self.group_col)
            return kruskal, [
                self._post_hoc_nemenyi(),
                self._kruskal_multiple_comparisons(),
                eff.VD_A()
            ]

        return kruskal, None
コード例 #4
0
def kw_test(err_or_dt, var, min_exp_bound=-float('inf'), ignore_exp=()):
    err_or_dt_dict, _min_exp, _max_exp = get_err_or_dt_dict(f'results/diff_dens/diff_{err_or_dt}', err_or_dt,
                                                            min_exp_bound=min_exp_bound, ignore_exp=ignore_exp)
    step_items = tuple(err_or_dt_dict.items())
    for s, df in step_items:
        df['step'] = np.repeat(s, len(df))
    for (s1, df1), (s2, df2) in zip(step_items[:-1], step_items[1:]):
        print(s1, s2)
        data = df1.append(df2)
        print(pg.kruskal(data, dv=var, between='step'))
        print()
コード例 #5
0
    def apply(self, ax, alpha=0.05, plot=True, ylabel=''):
        kruskal = pg.kruskal(dv=self.val_col,
                             between=self.group_col,
                             data=self.df)

        if 'p-unc' in kruskal.columns:
            pvalue = kruskal['p-unc'][0]

            if plot:
                chi_squared, degree_freed = kruskal['H'][0], kruskal['ddof1'][
                    0]

                p = "< 0.001" if pvalue < 0.001 else (
                    "< 0.01" if pvalue < 0.01 else
                    ("< 0.05" if pvalue < 0.05 else (round(pvalue, 3))))

                sns.boxplot(x=self.group_col,
                            y=self.val_col,
                            data=self.df,
                            ax=ax)

                # Jittered BoxPlots
                sns.stripplot(x=self.group_col,
                              y=self.val_col,
                              data=self.df,
                              size=4,
                              jitter=True,
                              edgecolor="gray",
                              ax=ax)

                # Add mean and median lines
                ax.axhline(y=self.df[self.val_col].mean(),
                           color='r',
                           linestyle='--',
                           linewidth=1.5)
                ax.axhline(y=self.df[self.val_col].median(),
                           color='b',
                           linestyle='--',
                           linewidth=2)

                ax.set_ylabel(ylabel)
                ax.set_xlabel(f"\nKruskal-Wallis p-value = {p}", labelpad=15)

            # If the Kruskal-Wallis test is significant, a post-hoc analysis can be performed
            # to determine which levels of the independent variable differ from
            # each other level.
            if pvalue < alpha:
                return kruskal, [
                    self._post_hoc_nemenyi(),
                    VD_A_DF(self.df, self.val_col, self.group_col)
                ]

        return kruskal, None
コード例 #6
0
ファイル: routes.py プロジェクト: GeorgeWebberley/surveyApp
def kruskall_wallis(survey_id, df, independent_variable, dependent_variable,
                    form):
    if is_string_dtype(df[dependent_variable]):
        flash(
            "Dependent Variable '" + dependent_variable + "' is not numeric.",
            "danger")
        return render_template("analysis/analysedata.html", form=form)
    kruskal_result = kruskal(data=df,
                             dv=dependent_variable,
                             between=independent_variable)
    # get the p-value (p-unc) from the kruskal test and convert to 4 decimal places only
    p_value = "%.4f" % kruskal_result["p-unc"][0]
    return redirect(
        url_for('analysis.result',
                survey=survey_id,
                test="Kruskall Wallis Test",
                p_value=p_value,
                independent_variable=independent_variable,
                dependent_variable=dependent_variable))
コード例 #7
0
 def kruskal_test(df, dependent_variable, between):
     """Do Kruskal-Wallis analysis"""
     # Kruskal-Wallis one way analysis of variance
     return kruskal(data=df, dv=dependent_variable, between=between)
コード例 #8
0
def analyse(survey_id):
    form = StatisticalTestForm()
    survey = mongo.db.surveys.find_one_or_404({"_id": ObjectId(survey_id)})
    if survey["user"] != current_user._id:
        flash("You do not have access to that page", "danger")
        abort(403)
    df = read_file(survey["fileName"])
    # Populate the select options in the form with all the variables
    for variable in list(df.columns.values):
        form.independent_variable.choices.append((variable, variable))
        form.dependent_variable.choices.append((variable, variable))
    if form.validate_on_submit():
        # Get the dataset, and save the variables in python variables
        independent_variable = form.independent_variable.data
        dependent_variable = form.dependent_variable.data
        # Ensure the user hasn't selected the same variable for both
        if independent_variable == dependent_variable:
            flash("You can't select the same variable for both.", "danger")
            return render_template("analysis/analysedata.html", form=form)
        test = form.test.data
        # If the user selects Chi-Square goodness fit then they are redirected to a separate URL
        if test == "Chi-Square goodness of fit":
            return redirect(
                url_for('analysis.chi_goodness',
                        variable=independent_variable,
                        survey_id=survey_id))
        # The other tests all require a dependent variable
        if dependent_variable == "":
            flash("You must select a dependent variable for this test.",
                  "danger")
            return render_template("analysis/analysedata.html", form=form)
        if test == "Kruskall Wallis Test":
            if is_string_dtype(df[dependent_variable]):
                flash(
                    "Dependent Variable '" + dependent_variable +
                    "' is not numeric.", "danger")
                return render_template("analysis/analysedata.html", form=form)
            kruskal_result = kruskal(data=df,
                                     dv=dependent_variable,
                                     between=independent_variable)
            # get the p-value (p-unc) from the kruskal test and convert to 4 decimal places only
            p_value = "%.4f" % kruskal_result["p-unc"][0]
        # AT THE MOMENT, THIS TEST IS 2 TAILED. MAY WANT TO ADD OPTIONS FOR 1 TAILED TESTS
        elif test == "Mann-Whitney U Test":
            if is_string_dtype(df[dependent_variable]):
                flash(
                    "Dependent Variable '" + dependent_variable +
                    "' is not numeric.", "danger")
                return render_template("analysis/analysedata.html", form=form)
            group_by = df.groupby(independent_variable)
            group_array = [group_by.get_group(x) for x in group_by.groups]
            if len(group_array) != 2:
                flash(
                    "Independent variable '" + independent_variable +
                    "' has too many groups, only 2 allowed for Mann-Whitney U Test.",
                    "danger")
                return render_template("analysis/analysedata.html", form=form)
            x = group_array[0][dependent_variable].values
            y = group_array[1][dependent_variable].values
            mwu_result = mwu(x, y)
            p_value = "%.4f" % mwu_result['p-val'].values[0]
        elif test == "Chi-Square Test":
            contingency_table = pd.crosstab(df[independent_variable],
                                            df[dependent_variable])
            _, p_value, _, _ = chi2_contingency(contingency_table,
                                                correction=False)

        return redirect(
            url_for('analysis.result',
                    survey=survey_id,
                    test=test,
                    p_value=p_value,
                    independent_variable=independent_variable,
                    dependent_variable=dependent_variable))
    return render_template("analysis/analysedata.html", form=form)
コード例 #9
0
ファイル: status.py プロジェクト: lostmachine18/Status
    color_palette = expand.selectbox(
        "Choose color palette",
        ("Set2", "Accent", "Blues", "BrBG", "Dark2", "GnBu", "Greys",
         "Oranges", "Paired", "Pastel1", "Purples", "Set1", "Set3", "Spectral",
         "Wistia", "autumn", "binary", "cividis", "cool", "coolwarm",
         "icefire", "inferno", "magma", "ocean", "plasma", "rainbow", "summer",
         "twilight", "viridis", "winter"))

    st.header("Difference in means between groups results")
    st.success("Descriptive statistics are being calculated")
    function_dict = {x_var: ["mean", "std", "sem", "count"]}

    new = pd.DataFrame(df.groupby(y_var).aggregate(function_dict))
    st.write(new)

    results = pg.kruskal(data=df, dv=x_var, between=y_var, detailed=True)
    st.success("Kruskal-Wallis non-parametric ANOVA results")
    st.write(results)

    st.success("Games-Howell multiple comparisons")
    games_howell = pg.pairwise_gameshowell(dv=x_var, between=y_var,
                                           data=df).round(3)
    st.write(games_howell)
    df_filtered = games_howell[games_howell['pval'] < 0.05][['A', 'B']]
    tuples = [tuple(x) for x in df_filtered.to_numpy()]

    st.markdown("## ")

    st.success("Bar plots with errors are being generated")
    fig = plt.figure(figsize=(12, 6))
    error = None
df = pd.read_excel(file, header=0, index_col=0)

df.index = df['condition']

df = df.loc[groups]

for cat, indx in zip(['Local', 'Distal'], range(2)):

    print('')
    print('{} side'.format(cat))

    sn.boxplot(x='condition', y=cat, data=df, ax=ax[indx], palette=colors)
    sn.swarmplot(x='condition', y=cat, data=df, ax=ax[indx], color='0.5')

    print(pg.kruskal(data=df, dv=cat, between='condition'))

    print('')

    #    kw = stats.kruskal(df.loc[groups[0],cat].values,
    #                       df.loc[groups[1],cat].values,
    #                       df.loc[groups[2],cat].values,
    #                       df.loc[groups[3],cat].values,
    #                       df.loc[groups[4],cat].values,
    #                       df.loc[groups[5],cat].values,
    #                       df.loc[groups[6],cat].values)

    #    print (kw)

    for group in groups:
コード例 #11
0
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pingouin as pg

nest_raw = pd.read_csv(
    '/Users/labc02/Documents/PDCB_data/MK-project/Nesting Data.csv')

nest_1mg = nest_raw[nest_raw['Dose (mg/Kg)'] == 1.0]
nest_1mg

#Variable is ordinal; Kruskall-Wallis
#Check homoscedasticity
pg.homoscedasticity(data=nest_1mg, dv='Nesting Score', group='Genotype')
nest_kw = pg.kruskal(data=nest_1mg, dv='Nesting Score', between='Genotype')
nest_kw
#%%
nest_fig = plt.figure(figsize=(4, 3))
sns.boxplot(x='Tx',
            y='Nesting Score',
            hue='Genotype',
            data=nest_1mg,
            palette=['forestgreen', 'royalblue'],
            showmeans=True,
            meanprops={
                'marker': '+',
                'markeredgecolor': 'k'
            },
            width=.5)
plt.legend(frameon=False, loc='lower right')
plt.xlabel('Treatment')
コード例 #12
0
                    test=test,
                    text_format='star',
                    loc='inside',
                    verbose=2,
                    comparisons_correction=correction)

sn.boxplot(data=distributionsDf, palette=pal, ax=box[4])
#sn.swarmplot(data=distributionsDf,ax=box[4],color='0.25')

if saveData == True:
    allData.to_excel('{}/Average_Amplitudes.xlsx'.format(saveDir))

if statsToDo == True:

    kruskalAvg = pingouin.kruskal(data=allData,
                                  dv='Average amplitudes (pA)',
                                  between='Condition')
    print('Avg Amp')
    print(kruskalAvg)
    box[0].set_title('p(KW)={}'.format(round(kruskalAvg['p-unc'][0], 7)))

    kruskalTotal = pingouin.kruskal(data=allData,
                                    dv='Total amplitudes (pA)',
                                    between='Condition')
    print('Total Amp')
    print(kruskalTotal)
    box[1].set_title('p(KW)={}'.format(round(kruskalTotal['p-unc'][0], 7)))

    kruskalProportion = pingouin.kruskal(data=allData,
                                         dv='Propotion (%)',
                                         between='Condition')
コード例 #13
0
                    print('    {} vs {} p-val={}'.format(
                        colA, colB, mwu_test[1]))

    else:
        print('KW test failed')

plt.tight_layout()

#Inter group stats
print('')
print('------Inter Group Statistics-------')
globalDf = pd.concat(globalDf, axis=0)

for col in deepValues:

    interKruskal = pg.kruskal(data=globalDf, dv=col, between='condition')
    print('Range:{}'.format(col))
    print(interKruskal)
    print('')

    #post hoc MWU
    for groupA in groups:
        serieA = globalDf[col].loc[(globalDf['condition'] == groupA)]

        for groupB in groups:
            serieB = globalDf[col].loc[(globalDf['condition'] == groupB)]

            interPostMwu = stats.mannwhitneyu(serieA,
                                              serieB,
                                              alternative='two-sided')
コード例 #14
0
    def qualOrdinalUnpaired(imgDir,
                            sheetName,
                            sheetDf,
                            sheetScale,
                            silent=False):
        print("######################################## ", sheetName,
              " ########################################"
              ) if not silent else None
        meltedSheetDf = sheetDf.melt(var_name='Factor', value_name='Variable')
        contingencySheetDf = pd.crosstab(index=meltedSheetDf['Variable'],
                                         columns=meltedSheetDf['Factor'])
        statDf = pd.DataFrame(columns=[
            'COMPARISON', 'TEST', 'STATISTICS', 'P-VALUE', 'EFFECT SIZE'
        ])
        #fill empty scale value
        for sheetStep in range(sheetScale):
            if not sheetStep in contingencySheetDf.index.values:
                contingencySheetDf.loc[sheetStep] = [
                    0 for x in range(len(contingencySheetDf.columns.values))
                ]
        contingencySheetDf.sort_index(inplace=True)
        # ALL MODALITY
        if len(contingencySheetDf.columns) > 2:
            sheetDf_long = sheetDf.melt(ignore_index=False).reset_index()
            kruskal_stats = pg.kruskal(data=sheetDf_long,
                                       dv="value",
                                       between="variable")
            source, ddof1, hvalue, pvalue = kruskal_stats.values[0]
            statDf = statDf.append(
                {
                    'COMPARISON': 'ALL',
                    'TEST': "Kruskal-Wallis",
                    'STATISTICS': hvalue,
                    'P-VALUE': pvalue,
                    'EFFECT SIZE': -1
                },
                ignore_index=True)

        # BETWEEN MODALITY
        modality_names = sheetDf.columns.values
        uncorrectedStatIndex = len(statDf.index)
        for i in range(len(modality_names)):
            for j in range(i + 1, len(modality_names)):
                stats_mannwhitney = pg.mwu(x=sheetDf.loc[:, modality_names[i]],
                                           y=sheetDf.loc[:, modality_names[j]],
                                           alternative='two-sided')
                uvalue, alternative, pvalue, RBC, CLES = stats_mannwhitney.values[
                    0]
                statDf = statDf.append(
                    {
                        'COMPARISON':
                        modality_names[i] + '|' + modality_names[j],
                        'TEST': "Mann-Whitney",
                        'STATISTICS': uvalue,
                        'P-VALUE': pvalue,
                        'EFFECT SIZE': RBC
                    },
                    ignore_index=True)
        reject, statDf.loc[uncorrectedStatIndex::, 'P-VALUE'] = pg.multicomp(
            statDf.loc[uncorrectedStatIndex::, 'P-VALUE'].values,
            alpha=0.05,
            method="holm")

        StackedBarPlotter.StackedBarPlotter(filename=imgDir + '/' + sheetName +
                                            '.png',
                                            title=sheetName,
                                            dataDf=sheetDf,
                                            histDf=contingencySheetDf,
                                            statDf=statDf)
コード例 #15
0
file = 'E:/03_FORMATED_DATA/BEHAVIOR/Catwalk_Norm_Profiles_Cuff_Sham_Ctrl.xlsx'
palette = ['royalblue','0.5','lightcoral']

fig, ax = plt.subplots(1,4, sharex=False, sharey=True)

df = pd.read_excel(file, header=0)

#Easy first, peak amplitude at D15
postOp15 = df[['post_op_15','Condition']]

sn.boxplot(x='Condition',y='post_op_15',data=df,ax=ax[0],palette=['lightcoral','0.5','royalblue'])
#sn.swarmplot(x='Condition',y='post_op_15',data=postOp15,ax=ax[0],color='black')

#[1] --------------------Stats on PostOp15 peak-------------------------------------
postOp15_KW = pg.kruskal(data=postOp15,dv='post_op_15',between='Condition')
postOp15_Anova = pg.anova(data=postOp15,dv='post_op_15',between='Condition')
print('Analysis of behavioral features')
print ('[1]--------------------- Post Op 15 ------------------------')

print('Average values')
print(postOp15.groupby('Condition').mean())
print('')
print('STD')
print(postOp15.groupby('Condition').std())
print('')

print('Multi condition test')
print ('Kruskal Wallis')
print (postOp15_KW)
if postOp15_KW['p-unc'].values <= 0.05:
コード例 #16
0
ファイル: hfo_deltastatetests.py プロジェクト: mdnunez/sozhfo
    stackeddelta = np.hstack((np.squeeze(summarydelta['state1delta']),
                              np.squeeze(summarydelta['state2delta'])))
    whichstate = np.ones(stackeddelta.shape[0]) * 2
    whichstate[0:(np.squeeze(summarydelta['state1delta']).size)] = 1
    delta_df = pd.DataFrame({
        'standarddelta': stackeddelta,
        'brainstate': whichstate
    })
    aov = anova(dv='standarddelta',
                between='brainstate',
                data=delta_df,
                detailed=True)
    allFstat[p] = aov['F'][0]
    ANOVApvals[p] = aov['p-unc'][0]
    kw = kruskal(dv='standarddelta',
                 between='brainstate',
                 data=delta_df,
                 detailed=True)
    allHstat[p] = kw['H'][0]
    KWpvals[p] = kw['p-unc'][0]

nANOVAsig001 = np.sum(ANOVApvals < .001)
print(
    f'There were {nANOVAsig001} significant differences by ANOVA (alpha = .001) of {npatients} patients between mean standardized delta across both brain-derived states'
)
nANOVAsig01 = np.sum(ANOVApvals < .01)
print(
    f'There were {nANOVAsig01} significant differences by ANOVA (alpha = .01) of {npatients} patients between mean standardized delta across both brain-derived states'
)
nANOVAsig05 = np.sum(ANOVApvals < .05)
print(
    f'There were {nANOVAsig05} significant differences by ANOVA (alpha = .05) of {npatients} patients between mean standardized delta across both brain-derived states'
コード例 #17
0
def stats(model, quantity, data, targets, tw, rm, nd):
    if model == 'absolute':
        data = data.drop(['NormQuant'], axis=1)
        data['NormMean'] = data['NormMean'].astype(float)
        mean = 'NormMean'
    else:
        data = data.drop(['rq'], axis=1)
        data['rqMean'] = data['rqMean'].astype(float)
        mean = 'rqMean'

    # prepare data from intermediate dataframe
    data = data[data['Outliers'].eq(False)]
    data = data.drop_duplicates(keep='first')

    # t-test and anova for normally distributed data
    if nd == 'True':
        if quantity == 2:
            # T-Test between 2 groups
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()
            group = data['Group'].dropna()
            group = group.drop_duplicates(keep='first').values.tolist()
            for item in targets:
                df = data[data['Target Name'].eq(item)]
                group1 = df[df['Group'].eq(group[0])][mean]
                group2 = df[df['Group'].eq(group[1])][mean]
                t_test = ttest(group1, group2, paired=bool(rm))

                if rm == 'True':
                    t_test['paired'] = 'TRUE'
                else:
                    t_test['paired'] = 'FALSE'
                t_test['Target Name'] = item
                if stats_dfs is None:
                    stats_dfs = t_test
                else:
                    stats_dfs = stats_dfs.append(t_test, ignore_index=True)
            # reformat output table
            stats_dfs = stats_dfs.rename(columns={
                'cohen-d': 'effect size',
                'BF10': 'Bayes factor',
                'dof': 'DF'
            })
            cols = [
                'Target Name', 'DF', 'T', 'tail', 'paired', 'p-val',
                'effect size', 'power', 'Bayes factor'
            ]
            stats_dfs = stats_dfs.reindex(columns=cols)
        elif quantity >= 3:
            # ANOVA test
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()
            # tukey_dfs = pandas.DataFrame()
            pvals = []
            for item in targets:
                if rm == 'True':
                    # one-way
                    if tw == 'False':
                        # repeated measure anova
                        aov = pg.rm_anova(
                            dv=mean,
                            data=data[data['Target Name'].eq(item)],
                            within='Group',
                            subject='Sample Name',
                            detailed=True)
                        pvals.append(aov['p-unc'][0])
                        aov = aov.drop([1])
                        aov['measures'] = ['dependent']
                        aov['Target Name'] = item
                    # two-way
                    else:
                        aov = pg.rm_anova(
                            dv=mean,
                            data=data[data['Target Name'].eq(item)],
                            within=['Group1', 'Group2'],
                            subject='Sample Name',
                            detailed=True)
                        reject_tw, pval_corr_tw = pg.multicomp(list(
                            aov['p-unc']),
                                                               alpha=0.05,
                                                               method='bonf')
                        aov['p-value corrected'] = pval_corr_tw
                        aov['measures'] = ['dependent'] * 3
                        aov['Target Name'] = [item] * 3
                    aov.drop(['eps'], axis=1)
                    ph = pairwise_ttests(
                        data=data[data['Target Name'].eq(item)],
                        dv=mean,
                        within='Group',
                        subject='Sample Name',
                        padjust='fdr_bh')
                    ph['Target Name'] = item
                    ph['Test'] = 'T-Test'
                else:
                    # one-way
                    if tw == 'False':
                        aov = pg.anova(dv=mean,
                                       between='Group',
                                       data=data[data['Target Name'].eq(item)],
                                       detailed=True)
                        pvals.append(aov['p-unc'][0])
                        aov = aov.drop([1])
                        aov['measures'] = ['independent']
                        aov['Target Name'] = item
                        ph = pairwise_ttests(
                            data=data[data['Target Name'].eq(item)],
                            dv=mean,
                            between='Group',
                            padjust='fdr_bh')
                        ph['Test'] = 'T-Test'
                    # two-way
                    else:
                        aov = pg.anova(dv=mean,
                                       between=['Group1', 'Group2'],
                                       data=data[data['Target Name'].eq(item)],
                                       detailed=False)
                        aov = aov.drop([3])
                        reject_tw, pval_corr_tw = pg.multicomp(list(
                            aov['p-unc']),
                                                               alpha=0.05,
                                                               method='bonf')
                        aov['p-value corrected'] = pval_corr_tw
                        aov['measures'] = ['independent'] * 3
                        aov['Target Name'] = [item] * 3
                        ph = pairwise_ttests(
                            data=data[data['Target Name'].eq(item)],
                            dv=mean,
                            between=['Group1', 'Group2'],
                            padjust='fdr_bh')
                        ph['Test'] = 'T-Test'
                    ph['Target Name'] = item
                if stats_dfs is None:
                    stats_dfs = aov
                else:
                    stats_dfs = stats_dfs.append(aov, ignore_index=True)
                if posthoc_dfs is None:
                    posthoc_dfs = ph
                else:
                    posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True)

            reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf')

            # reformat output tables
            stats_dfs = stats_dfs.rename(columns={
                'p-unc': 'p-value',
                'np2': 'effect size'
            })
            if tw == 'False':
                stats_dfs['p-value corrected'] = pvals_corr
                stats_dfs['distribution'] = ['parametric'] * len(targets)
                stats_dfs['test'] = ['ANOVA'] * len(targets)
                stats_dfs['statistic'] = ['NA'] * len(targets)
            else:
                stats_dfs['distribution'] = ['parametric'] * (len(targets) * 3)
                stats_dfs['test'] = ['ANOVA'] * (len(targets) * 3)
                stats_dfs['statistic'] = ['NA'] * (len(targets) * 3)
            cols = [
                'Target Name', 'Source', 'DF', 'F', 'MS', 'SS', 'p-value',
                'p-value corrected', 'measures', 'distribution', 'test',
                'statistic', 'effect size'
            ]
            stats_dfs = stats_dfs.reindex(columns=cols)
            if tw == 'False':
                posthoc_dfs = posthoc_dfs.drop(['Contrast', 'T'], axis=1)
            else:
                posthoc_dfs = posthoc_dfs.drop(['T'], axis=1)
            posthoc_dfs = posthoc_dfs.rename(
                columns={
                    'hedges': 'effect size',
                    'p-corr': 'p-value corrected',
                    'p-unc': 'p-value',
                    'p-adjust': 'correction method',
                    'BF10': 'Bayes factor',
                    'dof': 'DF'
                })
            if tw == 'False':
                cols2 = [
                    'Target Name', 'A', 'B', 'DF', 'p-value corrected',
                    'p-value', 'correction method', 'Paired', 'Parametric',
                    'Test', 'effect size', 'Bayes factor'
                ]
            else:
                cols2 = [
                    'Target Name', 'Contrast', 'Group1', 'A', 'B', 'DF',
                    'p-value corrected', 'p-value', 'correction method',
                    'Paired', 'Parametric', 'Test', 'effect size',
                    'Bayes factor'
                ]
            posthoc_dfs = posthoc_dfs.reindex(columns=cols2)

    # nonparametric tests for not normally distributed data
    else:
        if quantity == 2:
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()
            group = data['Group'].dropna()
            group = group.drop_duplicates(keep='first').values.tolist()
            for item in targets:
                df = data[data['Target Name'].eq(item)]
                group1 = df[df['Group'].eq(group[0])][mean]
                group2 = df[df['Group'].eq(group[1])][mean]
                if rm == 'True':
                    # Mann-Whitney U test
                    test = mannwhitneyu(group1, group2)
                    test = pandas.DataFrame(
                        {
                            'Target Name': item,
                            'pvalue': test.pvalue,
                            'statistic': test.statistic
                        },
                        index=[0])
                else:
                    # Wilcoxon
                    test = wilcoxon(group1, group2)
                    test = pandas.DataFrame(
                        {
                            'Target Name': item,
                            'pvalue': test.pvalue,
                            'statistic': test.statistic
                        },
                        index=[0])
                if stats_dfs is None:
                    stats_dfs = test
                else:
                    stats_dfs = stats_dfs.append(test, ignore_index=True)

        elif quantity >= 3:
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()

            pvals = []
            for item in targets:
                if rm == 'True':
                    # friedman test for repeated measurements
                    df = pg.friedman(dv=mean,
                                     within='Group',
                                     subject='Sample Name',
                                     data=data[data['Target Name'].eq(item)])
                    pvals.append(df['p-unc'][0])
                    df['test'] = ['Friedman Q']
                    df['measures'] = ['dependent']
                    df = df.rename(columns={'Q': 'statistic'})
                    df['Target Name'] = item
                    df['DF'] = 'NA'
                    ph = pairwise_ttests(
                        data=data[data['Target Name'].eq(item)],
                        dv=mean,
                        within='Group',
                        subject='Sample Name',
                        padjust='fdr_bh',
                        parametric=False)
                    ph['Target Name'] = item
                    ph['DF'] = 'NA'
                    ph['Bayes factor'] = 'NA'
                    ph['Test'] = 'Wilcoxon'
                else:
                    # Kruskal-Wallis H test
                    df = pg.kruskal(dv=mean,
                                    between='Group',
                                    data=data[data['Target Name'].eq(item)])
                    pvals.append(df['p-unc'][0])
                    df['test'] = ['Kruskal-Wallis H']
                    df['measures'] = ['independent']
                    df = df.rename(columns={'H': 'statistic'})
                    df['Target Name'] = item
                    df['DF'] = 'NA'
                    ph = pairwise_ttests(
                        data=data[data['Target Name'].eq(item)],
                        dv=mean,
                        between='Group',
                        padjust='fdr_bh',
                        parametric=False)
                    ph['Target Name'] = item
                    ph['DF'] = 'NA'
                    ph['Bayes factor'] = 'NA'
                    ph['Test'] = 'Mann-Whitney U'
                if stats_dfs is None:
                    stats_dfs = df
                else:
                    stats_dfs = stats_dfs.append(df, ignore_index=True)
                if posthoc_dfs is None:
                    posthoc_dfs = ph
                else:
                    posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True)

            reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf')
            # reformat output tables
            stats_dfs = stats_dfs.rename(columns={
                'dof': 'DF',
                'p-unc': 'p-value'
            })
            stats_dfs['p-value corrected'] = pvals_corr
            stats_dfs['distribution'] = ['non-parametric'] * len(targets)
            stats_dfs['MS'] = ['NA'] * len(targets)
            stats_dfs['SS'] = ['NA'] * len(targets)
            stats_dfs['effect size'] = ['NA'] * len(targets)
            cols = [
                'Target Name', 'DF', 'MS', 'SS', 'p-value',
                'p-value corrected', 'measures', 'distribution', 'test',
                'statistic', 'effect size'
            ]
            stats_dfs = stats_dfs.reindex(columns=cols)

            posthoc_dfs = posthoc_dfs.drop(['Contrast'], axis=1)
            posthoc_dfs = posthoc_dfs.rename(
                columns={
                    'hedges': 'effect size',
                    'p-corr': 'p-value corrected',
                    'p-unc': 'p-value',
                    'p-adjust': 'correction method',
                    'BF10': 'Bayes factor'
                })
            cols2 = [
                'Target Name', 'A', 'B', 'DF', 'p-value corrected', 'p-value',
                'correction method', 'Paired', 'Parametric', 'Test',
                'effect size', 'Bayes factor'
            ]
            posthoc_dfs = posthoc_dfs.reindex(columns=cols2)

    return stats_dfs, posthoc_dfs