def test_pairwise_tukey(self): """Test function pairwise_tukey""" df = read_dataset('anova') stats = pairwise_tukey(dv='Pain threshold', between='Hair color', data=df) assert np.allclose([0.074, 0.435, 0.415, 0.004, 0.789, 0.037], stats.loc[:, 'p-tukey'].values.round(3), atol=0.05)
def test_pairwise_tukey(self): """Test function pairwise_tukey. The p-values are slightly different because of a different algorithm used to calculate the studentized range approximation, but significance should be the same. """ # Compare with R package `userfriendlyscience` - Hair color dataset # Update Feb 2021: The userfriendlyscience package has been removed # from CRAN. df = read_dataset('anova') stats = pairwise_tukey(dv='Pain threshold', between='Hair color', data=df) # JASP: [0.0741, 0.4356, 0.4147, 0.0037, 0.7893, 0.0366] # Pingouin: [0.0742, 0.4369, 0.4160, 0.0037, 0.7697, 0.0367] assert np.allclose([0.074, 0.435, 0.415, 0.004, 0.789, 0.037], stats.loc[:, 'p-tukey'].to_numpy().round(3), atol=0.05) # Compare with JASP in the Palmer Penguins dataset # The between factor (Species) is unbalanced. df = read_dataset("penguins") stats = df.pairwise_tukey(dv="body_mass_g", between="species").round(4) assert np.array_equal(stats['A'], ["Adelie", "Adelie", "Chinstrap"]) assert np.array_equal(stats['B'], ["Chinstrap", "Gentoo", "Gentoo"]) assert np.array_equal(stats['diff'], [-32.426, -1375.354, -1342.928]) # SE is different for each group (Tukey-Kramer) assert np.array_equal(stats['se'], [67.5117, 56.1480, 69.8569]) assert np.array_equal(stats['T'], [-0.4803, -24.4952, -19.2240]) # P-values JASP: [0.8807, 0.0000, 0.0000] # P-values Pingouin: [0.8694, 0.0010, 0.0010] sig = stats['p-tukey'].apply(lambda x: 'Yes' if x < 0.05 else 'No').to_numpy() assert np.array_equal(sig, ['No', 'Yes', 'Yes']) # Same but with balanced group df_balanced = df.groupby('species').head(20).copy() # To complicate things, let's encode between as a categorical df_balanced['species'] = df_balanced['species'].astype('category') stats = df_balanced.pairwise_tukey(dv="body_mass_g", between="species").round(4) assert np.array_equal(stats['A'], ["Adelie", "Adelie", "Chinstrap"]) assert np.array_equal(stats['B'], ["Chinstrap", "Gentoo", "Gentoo"]) assert np.array_equal(stats['diff'], [-142.5, -1457.5, -1315.]) # SE is the same for all groups (Tukey HSD) assert np.array_equal(stats['se'], [142.9475, 142.9475, 142.9475]) assert np.array_equal(stats['T'], [-0.9969, -10.1961, -9.1992]) # P-values JASP: [0.5818, 0.0000, 0.0000] # P-values Pingouin: [0.5766, 0.0010, 0.0010] sig = stats['p-tukey'].apply(lambda x: 'Yes' if x < 0.05 else 'No').to_numpy() assert np.array_equal(sig, ['No', 'Yes', 'Yes'])