def test_wilcoxon(self): """Test function wilcoxon""" # R: wilcox.test(df$x, df$y, paired = TRUE, exact = FALSE) # The V value is slightly different between SciPy and R # The p-value, however, is almost identical wc_scp = scipy.stats.wilcoxon(x, y, correction=True) wc_pg = wilcoxon(x, y, tail='two-sided') wc_pg_1 = wilcoxon(x, y, tail='one-sided') assert wc_scp[0] == wc_pg.at['Wilcoxon', 'W-val'] assert wc_scp[1] == wc_pg.at['Wilcoxon', 'p-val'] # Compare to R canprot::CLES assert wc_pg.at['Wilcoxon', 'CLES'] == 0.536 assert (wc_pg.at['Wilcoxon', 'p-val'] / 2) == wc_pg_1.at['Wilcoxon', 'p-val']
def test_wilcoxon(self): """Test function wilcoxon""" # R: wilcox.test(df$x, df$y, paired = TRUE, exact = FALSE) # The V value is slightly different between SciPy and R # The p-value, however, is almost identical wc_scp = scipy.stats.wilcoxon(x2, y2, correction=True) wc_pg = wilcoxon(x2, y2, alternative='two-sided') assert wc_scp[0] == wc_pg.at['Wilcoxon', 'W-val'] == 20.5 # JASP assert wc_scp[1] == wc_pg.at['Wilcoxon', 'p-val'] # Same but using the pre-computed difference # The W and p-values should be similar wc_pg2 = wilcoxon(np.array(x2) - np.array(y2)) assert wc_pg.at['Wilcoxon', 'W-val'] == wc_pg2.at['Wilcoxon', 'W-val'] assert wc_pg.at['Wilcoxon', 'p-val'] == wc_pg2.at['Wilcoxon', 'p-val'] assert wc_pg.at['Wilcoxon', 'RBC'] == wc_pg2.at['Wilcoxon', 'RBC'] assert np.isnan(wc_pg2.at['Wilcoxon', 'CLES']) wc_pg_less = wilcoxon(x2, y2, alternative='less') wc_pg_greater = wilcoxon(x2, y2, alternative='greater') # Note that the RBC value are compared to JASP in test_pairwise.py # The RBC values in JASP does not change according to the tail. assert round(wc_pg.at['Wilcoxon', 'RBC'], 3) == -0.379 assert round(wc_pg_less.at['Wilcoxon', 'RBC'], 3) == -0.379 assert round(wc_pg_greater.at['Wilcoxon', 'RBC'], 3) == -0.379 # CLES is compared to: # https://janhove.github.io/reporting/2016/11/16/common-language-effect-sizes assert round(wc_pg.at['Wilcoxon', 'CLES'], 3) == 0.396 assert round(wc_pg_less.at['Wilcoxon', 'CLES'], 3) == 0.604 assert round(wc_pg_greater.at['Wilcoxon', 'CLES'], 3) == 0.396 with pytest.raises(ValueError): wilcoxon(x2, y2, tail='error')
def test_wilcoxon(self): """Test function wilcoxon""" # R: wilcox.test(df$x, df$y, paired = TRUE, exact = FALSE) # The V value is slightly different between SciPy and R # The p-value, however, is almost identical wc_scp = scipy.stats.wilcoxon(x2, y2, correction=True) wc_pg = wilcoxon(x2, y2, tail='two-sided') assert wc_scp[0] == wc_pg.at['Wilcoxon', 'W-val'] == 20.5 # JASP assert wc_scp[1] == wc_pg.at['Wilcoxon', 'p-val'] wc_pg_less = wilcoxon(x2, y2, tail='less') wc_pg_greater = wilcoxon(x2, y2, tail='greater') wc_pg_ones = wilcoxon(x2, y2, tail='one-sided') pd.testing.assert_frame_equal(wc_pg_ones, wc_pg_less) # Note that the RBC value are compared to JASP in test_pairwise.py # The RBC values in JASP does not change according to the tail. assert round(wc_pg.at['Wilcoxon', 'RBC'], 3) == -0.379 assert round(wc_pg_less.at['Wilcoxon', 'RBC'], 3) == -0.379 assert round(wc_pg_greater.at['Wilcoxon', 'RBC'], 3) == -0.379 # CLES is compared to: # https://janhove.github.io/reporting/2016/11/16/common-language-effect-sizes assert round(wc_pg.at['Wilcoxon', 'CLES'], 3) == 0.396 assert round(wc_pg_less.at['Wilcoxon', 'CLES'], 3) == 0.604 assert round(wc_pg_greater.at['Wilcoxon', 'CLES'], 3) == 0.396
def test_pairwise_ttests(self): """Test function pairwise_ttests. Tested against the pairwise.t.test R function.""" df = read_dataset('mixed_anova.csv') # Within + Between + Within * Between pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df, alpha=.01) pairwise_ttests(dv='Scores', within=['Time'], between=['Group'], subject='Subject', data=df, padjust='fdr_bh', return_desc=True) # Simple within # In R: # >>> pairwise.t.test(df$Scores, df$Time, pool.sd = FALSE, # ... p.adjust.method = 'holm', paired = TRUE) pt = pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df, return_desc=True, padjust='holm') np.testing.assert_array_equal(pt.loc[:, 'p-corr'].round(3), [0.174, 0.024, 0.310]) np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.087, 0.008, 0.310]) pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df, parametric=False, return_desc=True) # Simple between # In R: # >>> pairwise.t.test(df$Scores, df$Group, pool.sd = FALSE) pt = pairwise_ttests(dv='Scores', between='Group', data=df).round(3) assert pt.loc[0, 'p-unc'] == 0.023 pairwise_ttests(dv='Scores', between='Group', data=df, padjust='bonf', tail='one-sided', effsize='cohen', parametric=False, export_filename='test_export.csv') # Two between factors pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df, padjust='holm') pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df, padjust='holm', parametric=False) # Two within subject factors pairwise_ttests(dv='Scores', within=['Group', 'Time'], subject='Subject', data=df, padjust='bonf') pairwise_ttests(dv='Scores', within=['Group', 'Time'], subject='Subject', data=df, padjust='bonf', parametric=False) # Wrong tail argument with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between='Group', data=df, tail='wrong') # Wrong alpha argument with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between='Group', data=df, alpha='.05') # Both multiple between and multiple within with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between=['Time', 'Group'], within=['Time', 'Group'], subject='Subject', data=df) # Missing values df.iloc[[10, 15], 0] = np.nan pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df) # Wrong input argument df['Group'] = 'Control' with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between='Group', data=df) # Two within factors from other datasets and with NaN values df2 = read_dataset('rm_anova') pairwise_ttests(dv='DesireToKill', within=['Disgustingness', 'Frighteningness'], subject='Subject', padjust='holm', data=df2) # Compare with JASP tail / parametric argument df = read_dataset('pairwise_ttests') # 1. Within # 1.1 Parametric # 1.1.1 Tail is greater pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', data=df, tail='greater') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.907, 0.941, 0.405]) assert all(pt.loc[:, 'BF10'].astype(float) < 1) # 1.1.2 Tail is less pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', data=df, tail='less') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.093, 0.059, 0.595]) assert sum(pt.loc[:, 'BF10'].astype(float) > 1) == 2 # 1.1.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', data=df, tail='one-sided') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.093, 0.059, 0.405]) # 1.2 Non-parametric # 1.2.1 Tail is greater pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', parametric=False, data=df, tail='greater') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.910, 0.951, 0.482]) # 1.2.2 Tail is less pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', parametric=False, data=df, tail='less') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.108, 0.060, 0.554]) # 1.2.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', parametric=False, data=df, tail='one-sided') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.108, 0.060, 0.482]) # Compare the RBC value for wilcoxon from pingouin.nonparametric import wilcoxon x = df[df['Drug'] == 'A']['Scores'].values y = df[df['Drug'] == 'B']['Scores'].values assert -0.6 < wilcoxon(x, y).at['Wilcoxon', 'RBC'] < -0.4 x = df[df['Drug'] == 'B']['Scores'].values y = df[df['Drug'] == 'C']['Scores'].values assert wilcoxon(x, y).at['Wilcoxon', 'RBC'].round(3) == 0.030 # 2. Between # 2.1 Parametric # 2.1.1 Tail is greater pt = pairwise_ttests(dv='Scores', between='Gender', data=df, tail='greater') assert pt.loc[0, 'p-unc'].round(3) == 0.068 assert float(pt.loc[0, 'BF10']) > 1 # 2.1.2 Tail is less pt = pairwise_ttests(dv='Scores', between='Gender', data=df, tail='less') assert pt.loc[0, 'p-unc'].round(3) == 0.932 assert float(pt.loc[0, 'BF10']) < 1 # 2.1.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', between='Gender', data=df, tail='one-sided') assert pt.loc[0, 'p-unc'].round(3) == 0.068 assert float(pt.loc[0, 'BF10']) > 1 # 2.2 Non-parametric # 2.2.1 Tail is greater pt = pairwise_ttests(dv='Scores', between='Gender', parametric=False, data=df, tail='greater') assert pt.loc[0, 'p-unc'].round(3) == 0.105 # 2.2.2 Tail is less pt = pairwise_ttests(dv='Scores', between='Gender', parametric=False, data=df, tail='less') assert pt.loc[0, 'p-unc'].round(3) == 0.901 # 2.2.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', between='Gender', parametric=False, data=df, tail='one-sided') assert pt.loc[0, 'p-unc'].round(3) == 0.105 # Compare the RBC value for MWU from pingouin.nonparametric import mwu x = df[df['Gender'] == 'M']['Scores'].values y = df[df['Gender'] == 'F']['Scores'].values assert abs(mwu(x, y).at['MWU', 'RBC']) == 0.252
def test_pairwise_ttests(self): """Test function pairwise_ttests. Tested against the pairwise.t.test R function, as well as JASP and JAMOVI. Notes: 1) JAMOVI by default pool the error term for the within-subject factor in mixed design. Pingouin does not pool the error term, which is the same behavior as JASP. 2) JASP does not return the uncorrected p-values, therefore only the corrected p-values are compared. 3) JASP does not calculate the Bayes Factor for the interaction terms. For mixed design and two-way design, in JASP, the Bayes Factor seems to be calculated without aggregating over repeated measurements. 4) For factorial between-subject contrasts, both JASP and JAMOVI pool the error term. This option is not yet implemented in Pingouin. Therefore, one cannot directly validate the T and p-values. """ df = read_dataset('mixed_anova.csv') # Simple and mixed design df_sort = df.sort_values('Time') # Same but random order of subject # df_sort = df.sample(frac=1) df_unb = read_dataset('mixed_anova_unbalanced') df_rm2 = read_dataset('rm_anova2') # 2-way rm design df_aov2 = read_dataset('anova2') # 2-way factorial design # ------------------------------------------------------------------- # Simple within: EASY! # ------------------------------------------------------------------- # In R: # >>> pairwise.t.test(df$Scores, df$Time, pool.sd = FALSE, # ... p.adjust.method = 'holm', paired = TRUE) pt = pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df, return_desc=True, padjust='holm') np.testing.assert_array_equal(pt.loc[:, 'p-corr'].round(3), [0.174, 0.024, 0.310]) np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.087, 0.008, 0.310]) pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df, parametric=False, return_desc=True) # Same after random ordering of subject (issue 151) pt_sort = pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df_sort, return_desc=True, padjust='holm') assert pt_sort.equals(pt) # ------------------------------------------------------------------- # Simple between: EASY! # ------------------------------------------------------------------- # In R: >>> pairwise.t.test(df$Scores, df$Group, pool.sd = FALSE) pt = pairwise_ttests(dv='Scores', between='Group', data=df).round(3) assert pt.loc[0, 'p-unc'] == 0.023 pairwise_ttests(dv='Scores', between='Group', data=df, padjust='bonf', tail='one-sided', effsize='cohen', parametric=False) # Same after random ordering of subject (issue 151) pt_sort = pairwise_ttests(dv='Scores', between='Group', data=df_sort).round(3) assert pt_sort.equals(pt) # ------------------------------------------------------------------- # Mixed design: Within + Between + Within * Between # ------------------------------------------------------------------- # .Balanced data # ..With marginal means pt = pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df, padjust='holm', interaction=False) # ...Within main effect: OK with JASP assert np.array_equal(pt['Paired'], [True, True, True, False]) assert np.array_equal(pt.loc[:2, 'p-corr'].round(3), [0.174, 0.024, 0.310]) assert np.array_equal(pt.loc[:2, 'BF10'].astype(float), [0.582, 4.232, 0.232]) # ..Between main effect: T and p-values OK with JASP # but BF10 is only similar when marginal=False (see note in the # 2-way RM test below). assert pt.loc[3, 'T'].round(3) == -2.248 assert pt.loc[3, 'p-unc'].round(3) == 0.028 # ..Interaction: slightly different because JASP pool the error term # across the between-subject groups. JASP does not compute the BF10 # for the interaction. pt = pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df, padjust='holm', interaction=True).round(5) # Same after random ordering of subject (issue 151) pt_sort = pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df_sort, padjust='holm', interaction=True).round(5) assert pt_sort.equals(pt) # ..Changing the order of the model with ``within_first=False``. # output model is now between + within + between * within. # https://github.com/raphaelvallat/pingouin/issues/102 pt = pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df, padjust='holm', within_first=False) # This should be equivalent to manually filtering dataframe to keep # only one level at a time of the between factor and then running # a within-subject pairwise T-tests. pt_con = pairwise_ttests(dv='Scores', within='Time', subject='Subject', padjust='holm', data=df[df['Group'] == 'Control']) pt_med = pairwise_ttests(dv='Scores', within='Time', subject='Subject', padjust='holm', data=df[df['Group'] == 'Meditation']) pt_merged = pt_con.append(pt_med) # T, dof and p-values should be equal assert np.array_equal(pt_merged['T'], pt['T'].iloc[4:]) assert np.array_equal(pt_merged['dof'], pt['dof'].iloc[4:]) assert np.array_equal(pt_merged['p-unc'], pt['p-unc'].iloc[4:]) # However adjusted p-values are not equal because they are calculated # separately on each dataframe. assert not np.array_equal(pt_merged['p-corr'], pt['p-corr'].iloc[4:]) # I also manually checked the previous lines using parametric=False and # one-sided test. # Other options pairwise_ttests(dv='Scores', within=['Time'], between=['Group'], subject='Subject', data=df, padjust='fdr_bh', alpha=.01, return_desc=True, parametric=False) # .Unbalanced data # ..With marginal means pt1 = pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df_unb, padjust='bonf') # ...Within main effect: OK with JASP assert np.array_equal(pt1.loc[:5, 'T'].round(3), [-0.777, -1.344, -2.039, -0.814, -1.492, -0.627]) assert np.array_equal(pt1.loc[:5, 'p-corr'].round(3), [1., 1., 0.313, 1., 0.889, 1.]) assert np.array_equal(pt1.loc[:5, 'BF10'].astype(float), [0.273, 0.463, 1.221, 0.280, 0.554, 0.248]) # ...Between main effect: slightly different from JASP (why?) # True with or without the Welch correction... assert (pt1.loc[6:8, 'p-corr'] > 0.20).all() # ...Interaction: slightly different because JASP pool the error term # across the between-subject groups. # Below the interaction JASP bonferroni-correct p-values, which are # more conservative because JASP perform all possible pairwise tests # jasp_pbonf = [1., 1., 1., 1., 1., 1., 1., 0.886, 1., 1., 1., 1.] assert (pt1.loc[9:, 'p-corr'] > 0.05).all() # Check that the Welch corection is applied by default assert not pt1['dof'].apply(lambda x: x.is_integer()).all() # Same after random ordering of subject (issue 151) pt_sort = pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df_unb.sample(frac=1, replace=False), padjust='bonf') assert pt_sort.round(5).equals(pt1.round(5)) # ..No marginal means pt2 = pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df_unb, padjust='bonf', marginal=False) # This only impacts the between-subject contrast np.array_equal( (pt1['T'] == pt2['T']).astype(int), [1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) assert (pt1.loc[6:8, 'dof'] < pt2.loc[6:8, 'dof']).all() # Without the Welch correction, check that all the DF are integer pt3 = pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df_unb, correction=False) assert pt3['dof'].apply(lambda x: x.is_integer()).all() # ------------------------------------------------------------------- # Two between factors (FACTORIAL) # ------------------------------------------------------------------- pt = df_aov2.pairwise_ttests(dv='Yield', between=['Blend', 'Crop'], padjust='holm').round(3) # The T and p-values are close but not exactly the same as JASP / # JAMOVI, because they both pool the error term. # The dof are not available in JASP, but in JAMOVI they are 18 # everywhere, which I'm not sure to understand why... assert np.array_equal(pt.loc[:3, 'p-unc'] < 0.05, [False, False, False, True]) # However, the Bayes Factor of the simple main effects are the same...! np.array_equal(pt.loc[:3, 'BF10'].astype(float), [0.374, 0.533, 0.711, 2.287]) # Using the Welch method (all df should be non-integer) pt_c = df_aov2.pairwise_ttests(dv='Yield', between=['Blend', 'Crop'], padjust='holm', correction=True) assert not pt_c['dof'].apply(lambda x: x.is_integer()).any() # The ``marginal`` option has no impact here. assert pt.equals( df_aov2.pairwise_ttests(dv='Yield', between=['Blend', 'Crop'], padjust='holm', marginal=True).round(3)) # ------------------------------------------------------------------- # Two within subject factors # ------------------------------------------------------------------- # .Marginal = True ptw1 = pairwise_ttests(data=df_rm2, dv='Performance', within=['Time', 'Metric'], subject='Subject', padjust='bonf', marginal=True).round(3) # Compare the T values of the simple main effect against JASP # Note that the T-values of the interaction are slightly different # because JASP pool the error term. assert np.array_equal(ptw1.loc[0:3, 'T'], [5.818, 1.559, 7.714, 5.110]) # Random sorting of the dataframe (issue 151) pt_sort = pairwise_ttests(data=df_rm2.sample(frac=1), dv='Performance', within=['Time', 'Metric'], subject='Subject', padjust='bonf', marginal=True).round(3) assert pt_sort.equals(ptw1) # Non-parametric (mostly for code coverage) pairwise_ttests(data=df_rm2, dv='Performance', within=['Time', 'Metric'], subject='Subject', parametric=False) # ------------------------------------------------------------------- # ERRORS # ------------------------------------------------------------------- # Both multiple between and multiple within with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between=['Time', 'Group'], within=['Time', 'Group'], subject='Subject', data=df) # Wrong input argument df['Group'] = 'Control' with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between='Group', data=df) # ------------------------------------------------------------------- # Missing values in repeated measurements # ------------------------------------------------------------------- # 1. Parametric df = read_dataset('pairwise_ttests_missing') st = pairwise_ttests(dv='Value', within='Condition', subject='Subject', data=df, nan_policy='listwise') np.testing.assert_array_equal(st['dof'].to_numpy(), [7, 7, 7]) st2 = pairwise_ttests(dv='Value', within='Condition', data=df, subject='Subject', nan_policy='pairwise') np.testing.assert_array_equal(st2['dof'].to_numpy(), [8, 7, 8]) # 2. Non-parametric st = pairwise_ttests(dv='Value', within='Condition', subject='Subject', data=df, parametric=False, nan_policy='listwise') np.testing.assert_array_equal(st['W-val'].to_numpy(), [9, 3, 12]) st2 = pairwise_ttests(dv='Value', within='Condition', data=df, subject='Subject', nan_policy='pairwise', parametric=False) # Tested against a simple for loop on combinations np.testing.assert_array_equal(st2['W-val'].to_numpy(), [9, 3, 21]) with pytest.raises(ValueError): # Unbalanced design in repeated measurements df_unbalanced = df.iloc[1:, :].copy() pairwise_ttests(data=df_unbalanced, dv='Value', within='Condition', subject='Subject') # Two within factors from other datasets and with NaN values df2 = read_dataset('rm_anova') pairwise_ttests(dv='DesireToKill', within=['Disgustingness', 'Frighteningness'], subject='Subject', padjust='holm', data=df2) # ------------------------------------------------------------------- # Test tail / parametric argument (compare with JASP) # ------------------------------------------------------------------- df = read_dataset('pairwise_ttests') # 1. Within # 1.1 Parametric # 1.1.1 Tail is greater pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', data=df, tail='greater') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.907, 0.941, 0.405]) assert all(pt.loc[:, 'BF10'].astype(float) < 1) # 1.1.2 Tail is less pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', data=df, tail='less') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.093, 0.059, 0.595]) assert sum(pt.loc[:, 'BF10'].astype(float) > 1) == 2 # 1.1.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', data=df, tail='one-sided') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.093, 0.059, 0.405]) # 1.2 Non-parametric # 1.2.1 Tail is greater pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', parametric=False, data=df, tail='greater') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.910, 0.951, 0.483]) # 1.2.2 Tail is less pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', parametric=False, data=df, tail='less') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.108, 0.060, 0.551]) # 1.2.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', parametric=False, data=df, tail='one-sided') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.108, 0.060, 0.483]) # Compare the RBC value for wilcoxon from pingouin.nonparametric import wilcoxon x = df[df['Drug'] == 'A']['Scores'].to_numpy() y = df[df['Drug'] == 'B']['Scores'].to_numpy() assert -0.6 < wilcoxon(x, y).at['Wilcoxon', 'RBC'] < -0.4 x = df[df['Drug'] == 'B']['Scores'].to_numpy() y = df[df['Drug'] == 'C']['Scores'].to_numpy() assert wilcoxon(x, y).at['Wilcoxon', 'RBC'].round(3) == 0.030 # 2. Between # 2.1 Parametric # 2.1.1 Tail is greater pt = pairwise_ttests(dv='Scores', between='Gender', data=df, tail='greater') assert pt.loc[0, 'p-unc'].round(3) == 0.932 assert float(pt.loc[0, 'BF10']) < 1 # 2.1.2 Tail is less pt = pairwise_ttests(dv='Scores', between='Gender', data=df, tail='less') assert pt.loc[0, 'p-unc'].round(3) == 0.068 assert float(pt.loc[0, 'BF10']) > 1 # 2.1.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', between='Gender', data=df, tail='one-sided') assert pt.loc[0, 'p-unc'].round(3) == 0.068 assert float(pt.loc[0, 'BF10']) > 1 # 2.2 Non-parametric # 2.2.1 Tail is greater pt = pairwise_ttests(dv='Scores', between='Gender', parametric=False, data=df, tail='greater') assert pt.loc[0, 'p-unc'].round(3) == 0.901 # 2.2.2 Tail is less pt = pairwise_ttests(dv='Scores', between='Gender', parametric=False, data=df, tail='less') assert pt.loc[0, 'p-unc'].round(3) == 0.105 # 2.2.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', between='Gender', parametric=False, data=df, tail='one-sided') assert pt.loc[0, 'p-unc'].round(3) == 0.105 # Compare the RBC value for MWU from pingouin.nonparametric import mwu x = df[df['Gender'] == 'M']['Scores'].to_numpy() y = df[df['Gender'] == 'F']['Scores'].to_numpy() assert round(abs(mwu(x, y).at['MWU', 'RBC']), 3) == 0.252
def test_wilcoxon(self): """Test function wilcoxon""" wilcoxon(x, y, tail='one-sided') wilcoxon(x, y, tail='two-sided')
def pairwise_ttests(dv=None, between=None, within=None, subject=None, data=None, parametric=True, alpha=.05, tail='two-sided', padjust='none', effsize='hedges', return_desc=False, export_filename=None): '''Pairwise T-tests. Parameters ---------- dv : string Name of column containing the dependant variable. between : string or list with 2 elements Name of column(s) containing the between factor(s). within : string or list with 2 elements Name of column(s) containing the within factor(s). subject : string Name of column containing the subject identifier. Compulsory for contrast including a within-subject factor. data : pandas DataFrame DataFrame. Note that this function can also directly be used as a Pandas method, in which case this argument is no longer needed. parametric : boolean If True (default), use the parametric :py:func:`ttest` function. If False, use :py:func:`pingouin.wilcoxon` or :py:func:`pingouin.mwu` for paired or unpaired samples, respectively. alpha : float Significance level tail : string Indicates whether to return the 'two-sided' or 'one-sided' p-values padjust : string Method used for testing and adjustment of pvalues. Available methods are :: 'none' : no correction 'bonferroni' : one-step Bonferroni correction 'holm' : step-down method using Bonferroni adjustments 'fdr_bh' : Benjamini/Hochberg FDR correction 'fdr_by' : Benjamini/Yekutieli FDR correction effsize : string or None Effect size type. Available methods are :: 'none' : no effect size 'cohen' : Unbiased Cohen d 'hedges' : Hedges g 'glass': Glass delta 'eta-square' : Eta-square 'odds-ratio' : Odds ratio 'AUC' : Area Under the Curve return_desc : boolean If True, append group means and std to the output dataframe export_filename : string Filename (without extension) for the output file. If None, do not export the table. By default, the file will be created in the current python console directory. To change that, specify the filename with full path. Returns ------- stats : DataFrame Stats summary :: 'A' : Name of first measurement 'B' : Name of second measurement 'Paired' : indicates whether the two measurements are paired or not 'Parametric' : indicates if (non)-parametric tests were used 'Tail' : indicate whether the p-values are one-sided or two-sided 'T' : T-values (only if parametric=True) 'U' : Mann-Whitney U value (only if parametric=False and unpaired data) 'W' : Wilcoxon W value (only if parametric=False and paired data) 'dof' : degrees of freedom (only if parametric=True) 'p-unc' : Uncorrected p-values 'p-corr' : Corrected p-values 'p-adjust' : p-values correction method 'BF10' : Bayes Factor 'hedges' : Hedges effect size 'CLES' : Common language effect size Notes ----- Data are expected to be in long-format. If your data is in wide-format, you can use the :py:func:`pandas.melt` function to convert from wide to long format. If ``between`` or ``within`` is a list (e.g. ['col1', 'col2']), the function returns 1) the pairwise T-tests between each values of the first column, 2) the pairwise T-tests between each values of the second column and 3) the interaction between col1 and col2. The interaction is dependent of the order of the list, so ['col1', 'col2'] will not yield the same results as ['col2', 'col1']. In other words, if ``between`` is a list with two elements, the output model is between1 + between2 + between1 * between2. Similarly, if `within`` is a list with two elements, the output model is within1 + within2 + within1 * within2. If both ``between`` and ``within`` are specified, the function return within + between + within * between. Missing values in repeated measurements are automatically removed using the :py:func:`pingouin.remove_rm_na` function. However, you should be very careful since it can result in undesired values removal (especially for the interaction effect). We strongly recommend that you preprocess your data and remove the missing values before using this function. This function has been tested against the `pairwise.t.test` R function. See Also -------- ttest : T-test. wilcoxon : Non-parametric test for paired samples. mwu : Non-parametric test for independent samples. Examples -------- 1. One between-factor >>> from pingouin import pairwise_ttests, read_dataset >>> df = read_dataset('mixed_anova.csv') >>> post_hocs = pairwise_ttests(dv='Scores', between='Group', data=df) 2. One within-factor >>> post_hocs = pairwise_ttests(dv='Scores', within='Time', ... subject='Subject', data=df) >>> print(post_hocs) # doctest: +SKIP 3. Non-parametric pairwise paired test (wilcoxon) >>> pairwise_ttests(dv='Scores', within='Time', subject='Subject', ... data=df, parametric=False) # doctest: +SKIP 4. Within + Between + Within * Between with corrected p-values >>> posthocs = pairwise_ttests(dv='Scores', within='Time', ... subject='Subject', between='Group', ... padjust='bonf', data=df) 5. Between1 + Between2 + Between1 * Between2 >>> posthocs = pairwise_ttests(dv='Scores', between=['Group', 'Time'], ... data=df) ''' from pingouin.parametric import ttest from pingouin.nonparametric import wilcoxon, mwu # Safety checks _check_dataframe(dv=dv, between=between, within=within, subject=subject, effects='all', data=data) if tail not in ['one-sided', 'two-sided']: raise ValueError('Tail not recognized') if not isinstance(alpha, float): raise ValueError('Alpha must be float') # Check if we have multiple between or within factors multiple_between = False multiple_within = False contrast = None if isinstance(between, list): if len(between) > 1: multiple_between = True contrast = 'multiple_between' assert all([b in data.keys() for b in between]) else: between = between[0] if isinstance(within, list): if len(within) > 1: multiple_within = True contrast = 'multiple_within' assert all([w in data.keys() for w in within]) else: within = within[0] if all([multiple_within, multiple_between]): raise ValueError("Multiple between and within factors are", "currently not supported. Please select only one.") # Check the other cases if isinstance(between, str) and within is None: contrast = 'simple_between' assert between in data.keys() if isinstance(within, str) and between is None: contrast = 'simple_within' assert within in data.keys() if isinstance(between, str) and isinstance(within, str): contrast = 'within_between' assert all([between in data.keys(), within in data.keys()]) # Initialize empty variables stats = pd.DataFrame([]) ddic = {} if contrast in ['simple_within', 'simple_between']: # OPTION A: SIMPLE MAIN EFFECTS, WITHIN OR BETWEEN paired = True if contrast == 'simple_within' else False col = within if contrast == 'simple_within' else between # Remove NAN in repeated measurements if contrast == 'simple_within' and data[dv].isnull().values.any(): data = remove_rm_na(dv=dv, within=within, subject=subject, data=data) # Extract effects labels = data[col].unique().tolist() for l in labels: ddic[l] = data.loc[data[col] == l, dv].values # Number and labels of possible comparisons if len(labels) >= 2: combs = list(combinations(labels, 2)) else: raise ValueError('Columns must have at least two unique values.') # Initialize vectors for comb in combs: col1, col2 = comb x = ddic.get(col1) y = ddic.get(col2) if parametric: df_ttest = ttest(x, y, paired=paired, tail=tail) # Compute exact CLES df_ttest['CLES'] = compute_effsize(x, y, paired=paired, eftype='CLES') else: if paired: df_ttest = wilcoxon(x, y, tail=tail) else: df_ttest = mwu(x, y, tail=tail) # Compute Hedges / Cohen ef = compute_effsize(x=x, y=y, eftype=effsize, paired=paired) stats = _append_stats_dataframe(stats, x, y, col1, col2, alpha, paired, tail, df_ttest, ef, effsize) stats['Contrast'] = col # Multiple comparisons padjust = None if stats['p-unc'].size <= 1 else padjust if padjust is not None: if padjust.lower() != 'none': _, stats['p-corr'] = multicomp(stats['p-unc'].values, alpha=alpha, method=padjust) stats['p-adjust'] = padjust else: stats['p-corr'] = None stats['p-adjust'] = None else: # B1: BETWEEN1 + BETWEEN2 + BETWEEN1 * BETWEEN2 # B2: WITHIN1 + WITHIN2 + WITHIN1 * WITHIN2 # B3: WITHIN + BETWEEN + WITHIN * BETWEEN if contrast == 'multiple_between': # B1 factors = between fbt = factors fwt = [None, None] # eft = ['between', 'between'] paired = False elif contrast == 'multiple_within': # B2 factors = within fbt = [None, None] fwt = factors # eft = ['within', 'within'] paired = True else: # B3 factors = [within, between] fbt = [None, between] fwt = [within, None] # eft = ['within', 'between'] paired = False for i, f in enumerate(factors): stats = stats.append(pairwise_ttests(dv=dv, between=fbt[i], within=fwt[i], subject=subject, data=data, parametric=parametric, alpha=alpha, tail=tail, padjust=padjust, effsize=effsize, return_desc=return_desc), ignore_index=True, sort=False) # Rename effect size to generic name stats.rename(columns={effsize: 'efsize'}, inplace=True) # Then compute the interaction between the factors labels_fac1 = data[factors[0]].unique().tolist() labels_fac2 = data[factors[1]].unique().tolist() comb_fac1 = list(combinations(labels_fac1, 2)) comb_fac2 = list(combinations(labels_fac2, 2)) lc_fac1 = len(comb_fac1) lc_fac2 = len(comb_fac2) for lw in labels_fac1: for l in labels_fac2: tmp = data.loc[data[factors[0]] == lw] ddic[lw, l] = tmp.loc[tmp[factors[1]] == l, dv].values # Pairwise comparisons combs = list(product(labels_fac1, comb_fac2)) for comb in combs: fac1, (col1, col2) = comb x = ddic.get((fac1, col1)) y = ddic.get((fac1, col2)) if parametric: df_ttest = ttest(x, y, paired=paired, tail=tail) # Compute exact CLES df_ttest['CLES'] = compute_effsize(x, y, paired=paired, eftype='CLES') else: if paired: df_ttest = wilcoxon(x, y, tail=tail) else: df_ttest = mwu(x, y, tail=tail) ef = compute_effsize(x=x, y=y, eftype=effsize, paired=paired) stats = _append_stats_dataframe(stats, x, y, col1, col2, alpha, paired, tail, df_ttest, ef, effsize, fac1) # Update the Contrast columns txt_inter = factors[0] + ' * ' + factors[1] idxitr = np.arange(lc_fac1 + lc_fac2, stats.shape[0]).tolist() stats.loc[idxitr, 'Contrast'] = txt_inter # Multi-comparison columns if padjust is not None and padjust.lower() != 'none': _, pcor = multicomp(stats.loc[idxitr, 'p-unc'].values, alpha=alpha, method=padjust) stats.loc[idxitr, 'p-corr'] = pcor stats.loc[idxitr, 'p-adjust'] = padjust # --------------------------------------------------------------------- stats['Paired'] = stats['Paired'].astype(bool) stats['Parametric'] = parametric # Round effect size and CLES stats[['efsize', 'CLES']] = stats[['efsize', 'CLES']].round(3) # Reorganize column order col_order = [ 'Contrast', 'Time', 'A', 'B', 'mean(A)', 'std(A)', 'mean(B)', 'std(B)', 'Paired', 'Parametric', 'T', 'U', 'W', 'dof', 'tail', 'p-unc', 'p-corr', 'p-adjust', 'BF10', 'CLES', 'efsize' ] if return_desc is False: stats.drop(columns=['mean(A)', 'mean(B)', 'std(A)', 'std(B)'], inplace=True) stats = stats.reindex(columns=col_order) stats.dropna(how='all', axis=1, inplace=True) # Rename effect size column stats.rename(columns={'efsize': effsize}, inplace=True) # Rename Time columns if contrast in ['multiple_within', 'multiple_between', 'within_between']: stats['Time'].fillna('-', inplace=True) stats.rename(columns={'Time': factors[0]}, inplace=True) if export_filename is not None: _export_table(stats, export_filename) return stats
def test_pairwise_ttests(self): """Test function pairwise_ttests. Tested against the pairwise.t.test R function.""" df = read_dataset('mixed_anova.csv') # Within + Between + Within * Between pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df, alpha=.01) pairwise_ttests(dv='Scores', within=['Time'], between=['Group'], subject='Subject', data=df, padjust='fdr_bh', return_desc=True) # Simple within # In R: # >>> pairwise.t.test(df$Scores, df$Time, pool.sd = FALSE, # ... p.adjust.method = 'holm', paired = TRUE) pt = pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df, return_desc=True, padjust='holm') np.testing.assert_array_equal(pt.loc[:, 'p-corr'].round(3), [0.174, 0.024, 0.310]) np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.087, 0.008, 0.310]) pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df, parametric=False, return_desc=True) # Simple between # In R: # >>> pairwise.t.test(df$Scores, df$Group, pool.sd = FALSE) pt = pairwise_ttests(dv='Scores', between='Group', data=df).round(3) assert pt.loc[0, 'p-unc'] == 0.023 pairwise_ttests(dv='Scores', between='Group', data=df, padjust='bonf', tail='one-sided', effsize='cohen', parametric=False, export_filename='test_export.csv') # Two between factors pt = pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df, padjust='holm').round(3) pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df, padjust='holm', parametric=False) # .. with no interaction pt_no_inter = df.pairwise_ttests(dv='Scores', between=['Time', 'Group'], interaction=False, padjust='holm').round(3) assert pt.drop(columns=['Time']).iloc[0:4, :].equals(pt_no_inter) # Two within subject factors ptw = pairwise_ttests(data=df, dv='Scores', within=['Group', 'Time'], subject='Subject', padjust='bonf', parametric=False).round(3) ptw_no_inter = df.pairwise_ttests(dv='Scores', within=['Group', 'Time'], subject='Subject', padjust='bonf', interaction=False, parametric=False).round(3) assert ptw.drop(columns=['Group']).iloc[0:4, :].equals(ptw_no_inter) # Both multiple between and multiple within with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between=['Time', 'Group'], within=['Time', 'Group'], subject='Subject', data=df) # Wrong input argument df['Group'] = 'Control' with pytest.raises(ValueError): pairwise_ttests(dv='Scores', between='Group', data=df) # Missing values in repeated measurements # 1. Parametric df = read_dataset('pairwise_ttests_missing') st = pairwise_ttests(dv='Value', within='Condition', subject='Subject', data=df, nan_policy='listwise') np.testing.assert_array_equal(st['dof'].values, [7, 7, 7]) st2 = pairwise_ttests(dv='Value', within='Condition', data=df, subject='Subject', nan_policy='pairwise') np.testing.assert_array_equal(st2['dof'].values, [8, 7, 8]) # 2. Non-parametric st = pairwise_ttests(dv='Value', within='Condition', subject='Subject', data=df, parametric=False, nan_policy='listwise') np.testing.assert_array_equal(st['W-val'].values, [9, 3, 12]) st2 = pairwise_ttests(dv='Value', within='Condition', data=df, subject='Subject', nan_policy='pairwise', parametric=False) # Tested against a simple for loop on combinations np.testing.assert_array_equal(st2['W-val'].values, [9, 3, 21]) with pytest.raises(ValueError): # Unbalanced design in repeated measurements df_unbalanced = df.iloc[1:, :].copy() pairwise_ttests(data=df_unbalanced, dv='Value', within='Condition', subject='Subject') # Two within factors from other datasets and with NaN values df2 = read_dataset('rm_anova') pairwise_ttests(dv='DesireToKill', within=['Disgustingness', 'Frighteningness'], subject='Subject', padjust='holm', data=df2) # Compare with JASP tail / parametric argument df = read_dataset('pairwise_ttests') # 1. Within # 1.1 Parametric # 1.1.1 Tail is greater pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', data=df, tail='greater') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.907, 0.941, 0.405]) assert all(pt.loc[:, 'BF10'].astype(float) < 1) # 1.1.2 Tail is less pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', data=df, tail='less') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.093, 0.059, 0.595]) assert sum(pt.loc[:, 'BF10'].astype(float) > 1) == 2 # 1.1.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', data=df, tail='one-sided') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.093, 0.059, 0.405]) # 1.2 Non-parametric # 1.2.1 Tail is greater pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', parametric=False, data=df, tail='greater') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.910, 0.951, 0.482]) # 1.2.2 Tail is less pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', parametric=False, data=df, tail='less') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.108, 0.060, 0.554]) # 1.2.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject', parametric=False, data=df, tail='one-sided') np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3), [0.108, 0.060, 0.482]) # Compare the RBC value for wilcoxon from pingouin.nonparametric import wilcoxon x = df[df['Drug'] == 'A']['Scores'].values y = df[df['Drug'] == 'B']['Scores'].values assert -0.6 < wilcoxon(x, y).at['Wilcoxon', 'RBC'] < -0.4 x = df[df['Drug'] == 'B']['Scores'].values y = df[df['Drug'] == 'C']['Scores'].values assert wilcoxon(x, y).at['Wilcoxon', 'RBC'].round(3) == 0.030 # 2. Between # 2.1 Parametric # 2.1.1 Tail is greater pt = pairwise_ttests(dv='Scores', between='Gender', data=df, tail='greater') assert pt.loc[0, 'p-unc'].round(3) == 0.068 assert float(pt.loc[0, 'BF10']) > 1 # 2.1.2 Tail is less pt = pairwise_ttests(dv='Scores', between='Gender', data=df, tail='less') assert pt.loc[0, 'p-unc'].round(3) == 0.932 assert float(pt.loc[0, 'BF10']) < 1 # 2.1.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', between='Gender', data=df, tail='one-sided') assert pt.loc[0, 'p-unc'].round(3) == 0.068 assert float(pt.loc[0, 'BF10']) > 1 # 2.2 Non-parametric # 2.2.1 Tail is greater pt = pairwise_ttests(dv='Scores', between='Gender', parametric=False, data=df, tail='greater') assert pt.loc[0, 'p-unc'].round(3) == 0.105 # 2.2.2 Tail is less pt = pairwise_ttests(dv='Scores', between='Gender', parametric=False, data=df, tail='less') assert pt.loc[0, 'p-unc'].round(3) == 0.901 # 2.2.3 Tail is one-sided: smallest p-value pt = pairwise_ttests(dv='Scores', between='Gender', parametric=False, data=df, tail='one-sided') assert pt.loc[0, 'p-unc'].round(3) == 0.105 # Compare the RBC value for MWU from pingouin.nonparametric import mwu x = df[df['Gender'] == 'M']['Scores'].values y = df[df['Gender'] == 'F']['Scores'].values assert abs(mwu(x, y).at['MWU', 'RBC']) == 0.252
def tost(x, y, paired=False, parametric=True, bound=0.3, correction=False): """T-test. Parameters ---------- x : array_like First set of observations. y : array_like or float Second set of observations. If y is a single value, a one-sample T-test is computed. paired : boolean Specify whether the two observations are related (i.e. repeated measures) or independent. parametric : boolean If True (default), use the parametric :py:func:`ttest` function. If False, use :py:func:`pingouin.wilcoxon` or :py:func:`pingouin.mwu` for paired or unpaired samples, respectively. bound : float Magnitude of region of similarity correction : auto or boolean Specify whether or not to correct for unequal variances using Welch separate variances T-test Returns ------- stats : pandas DataFrame TOST summary :: 'upper' : upper interval p-value 'lower' : lower interval p-value 'p-val' : TOST p-value """ if parametric: df_ttesta = ttest(list(np.asarray(y) + bound), x, paired=paired, tail='one-sided', correction=correction) df_ttestb = ttest(list(np.asarray(x) + bound), y, paired=paired, tail='one-sided', correction=correction) if df_ttestb.loc['T-test', 'T'] < 0: df_ttestb.loc['T-test', 'p-val'] = 1 - df_ttestb.loc['T-test', 'p-val'] if df_ttesta.loc['T-test', 'T'] < 0: df_ttesta.loc['T-test', 'p-val'] = 1 - df_ttesta.loc['T-test', 'p-val'] if df_ttestb.loc['T-test', 'p-val'] >= df_ttesta.loc['T-test', 'p-val']: pval = df_ttestb.loc['T-test', 'p-val'] lpval = df_ttesta.loc['T-test', 'p-val'] else: pval = df_ttesta.loc['T-test', 'p-val'] lpval = df_ttestb.loc['T-test', 'p-val'] else: if paired: df_ttesta = wilcoxon(list(np.asarray(y) + bound), x, tail='greater') df_ttestb = wilcoxon(list(np.asarray(x) + bound), y, tail='greater') if df_ttestb.loc['Wilcoxon', 'p-val'] >= df_ttesta.loc['Wilcoxon', 'p-val']: pval = df_ttestb.loc['Wilcoxon', 'p-val'] lpval = df_ttesta.loc['Wilcoxon', 'p-val'] else: pval = df_ttesta.loc['Wilcoxon', 'p-val'] lpval = df_ttestb.loc['Wilcoxon', 'p-val'] else: df_ttesta = mwu(list(np.asarray(y) + bound), x, tail='greater') df_ttestb = mwu(list(np.asarray(x) + bound), y, tail='greater') if df_ttestb.loc['MWU', 'p-val'] >= df_ttesta.loc['MWU', 'p-val']: pval = df_ttestb.loc['MWU', 'p-val'] lpval = df_ttesta.loc['MWU', 'p-val'] else: pval = df_ttesta.loc['MWU', 'p-val'] lpval = df_ttestb.loc['MWU', 'p-val'] stats = {'p-val': pval, 'upper': pval, 'lower': lpval} # Convert to dataframe stats = pd.DataFrame.from_records(stats, index=['TOST']) col_order = ['upper', 'lower', 'p-val'] stats = stats.reindex(columns=col_order) stats.dropna(how='all', axis=1, inplace=True) return stats