def pairwise_tukey(dv=None, between=None, data=None, alpha=.05, tail='two-sided', effsize='hedges'): '''Pairwise Tukey-HSD post-hoc test. Parameters ---------- dv : string Name of column containing the dependant variable. between: string Name of column containing the between factor. data : pandas DataFrame DataFrame alpha : float Significance level tail : string Indicates whether to return the 'two-sided' or 'one-sided' p-values effsize : string or None Effect size type. Available methods are :: 'none' : no effect size 'cohen' : Unbiased Cohen d 'hedges' : Hedges g 'glass': Glass delta 'eta-square' : Eta-square 'odds-ratio' : Odds ratio 'AUC' : Area Under the Curve Returns ------- stats : DataFrame Stats summary :: 'A' : Name of first measurement 'B' : Name of second measurement 'mean(A)' : Mean of first measurement 'mean(B)' : Mean of second measurement 'diff' : Mean difference 'SE' : Standard error 'tail' : indicate whether the p-values are one-sided or two-sided 'T' : T-values 'p-tukey' : Tukey-HSD corrected p-values 'efsize' : effect sizes 'eftype' : type of effect size Notes ----- Tukey HSD post-hoc is best for balanced one-way ANOVA. It has been proven to be conservative for one-way ANOVA with unequal sample sizes. However, it is not robust if the groups have unequal variances, in which case the Games-Howell test is more adequate. Tukey HSD is not valid for repeated measures ANOVA. Note that when the sample sizes are unequal, this function actually performs the Tukey-Kramer test (which allows for unequal sample sizes). The T-values are defined as: .. math:: t = \dfrac{\overline{x}_i - \overline{x}_j}{\sqrt{2 \cdot MS_w / n}} where :math:`\overline{x}_i` and :math:`\overline{x}_j` are the means of the first and second group, respectively, :math:`MS_w` the mean squares of the error (computed using ANOVA) and :math:`n` the sample size. If the sample sizes are unequal, the Tukey-Kramer procedure is automatically used: .. math:: t = \dfrac{\overline{x}_i - \overline{x}_j}{\sqrt{\dfrac{MS_w}{n_i} + \dfrac{MS_w}{n_j}}} where :math:`n_i` and :math:`n_j` are the sample sizes of the first and second group, respectively. The p-values are then approximated using the Studentized range distribution :math:`Q(\sqrt2*|t_i|, r, N - r)` where :math:`r` is the total number of groups and :math:`N` is the total sample size. Note that the p-values might be slightly different than those obtained using R or Matlab since the studentized range approximation is done using the Gleason (1999) algorithm, which is more efficient and accurate than the algorithms used in Matlab or R. References ---------- .. [1] Tukey, John W. "Comparing individual means in the analysis of variance." Biometrics (1949): 99-114. .. [2] Gleason, John R. "An accurate, non-iterative approximation for studentized range quantiles." Computational statistics & data analysis 31.2 (1999): 147-158. Examples -------- Pairwise Tukey post-hocs on the pain threshold dataset. >>> from pingouin import pairwise_tukey >>> from pingouin.datasets import read_dataset >>> df = read_dataset('anova') >>> pairwise_tukey(dv='Pain threshold', between='Hair color', data=df) ''' from pingouin.external.qsturng import psturng # First compute the ANOVA aov = anova(dv=dv, data=data, between=between, detailed=True) df = aov.loc[1, 'DF'] ng = aov.loc[0, 'DF'] + 1 grp = data.groupby(between)[dv] n = grp.count().values gmeans = grp.mean().values gvar = aov.loc[1, 'MS'] / n # Pairwise combinations g1, g2 = np.array(list(combinations(np.arange(ng), 2))).T mn = gmeans[g1] - gmeans[g2] se = np.sqrt(gvar[g1] + gvar[g2]) tval = mn / se # Critical values and p-values # from pingouin.external.qsturng import qsturng # crit = qsturng(1 - alpha, ng, df) / np.sqrt(2) pval = psturng(np.sqrt(2) * np.abs(tval), ng, df) pval *= 0.5 if tail == 'one-sided' else 1 # Uncorrected p-values # from scipy.stats import t # punc = t.sf(np.abs(tval), n[g1].size + n[g2].size - 2) * 2 # Effect size d = tval * np.sqrt(1 / n[g1] + 1 / n[g2]) ef = convert_effsize(d, 'cohen', effsize, n[g1], n[g2]) # Create dataframe # Careful: pd.unique does NOT sort whereas numpy does stats = pd.DataFrame({ 'A': np.unique(data[between])[g1], 'B': np.unique(data[between])[g2], 'mean(A)': gmeans[g1], 'mean(B)': gmeans[g2], 'diff': mn, 'SE': np.round(se, 3), 'tail': tail, 'T': np.round(tval, 3), # 'alpha': alpha, # 'crit': np.round(crit, 3), 'p-tukey': pval, 'efsize': np.round(ef, 3), 'eftype': effsize, }) return stats
def pairwise_gameshowell(dv=None, between=None, data=None, alpha=.05, tail='two-sided', effsize='hedges'): '''Pairwise Games-Howell post-hoc test. Parameters ---------- dv : string Name of column containing the dependant variable. between: string Name of column containing the between factor. data : pandas DataFrame DataFrame alpha : float Significance level tail : string Indicates whether to return the 'two-sided' or 'one-sided' p-values effsize : string or None Effect size type. Available methods are :: 'none' : no effect size 'cohen' : Unbiased Cohen d 'hedges' : Hedges g 'glass': Glass delta 'eta-square' : Eta-square 'odds-ratio' : Odds ratio 'AUC' : Area Under the Curve Returns ------- stats : DataFrame Stats summary :: 'A' : Name of first measurement 'B' : Name of second measurement 'mean(A)' : Mean of first measurement 'mean(B)' : Mean of second measurement 'diff' : Mean difference 'SE' : Standard error 'tail' : indicate whether the p-values are one-sided or two-sided 'T' : T-values 'df' : adjusted degrees of freedom 'pval' : Games-Howell corrected p-values 'efsize' : effect sizes 'eftype' : type of effect size Notes ----- Games-Howell is very similar to the Tukey HSD post-hoc test but is much more robust to heterogeneity of variances. While the Tukey-HSD post-hoc is optimal after a classic one-way ANOVA, the Games-Howell is optimal after a Welch ANOVA. Games-Howell is not valid for repeated measures ANOVA. Compared to the Tukey-HSD test, the Games-Howell test uses different pooled variances for each pair of variables instead of the same pooled variance. The T-values are defined as: .. math:: t = \dfrac{\overline{x}_i - \overline{x}_j}{\sqrt{(\dfrac{s_i^2}{n_i} + \dfrac{s_j^2}{n_j})}} and the corrected degrees of freedom are: .. math:: v = \dfrac{(\dfrac{s_i^2}{n_i} + \dfrac{s_j^2}{n_j})^2} {\dfrac{(\dfrac{s_i^2}{n_i})^2}{n_i-1} + \dfrac{(\dfrac{s_j^2}{n_j})^2}{n_j-1}} where :math:`\overline{x}_i`, :math:`s_i^2`, and :math:`n_i` are the mean, variance and sample size of the first group and :math:`\overline{x}_j`, :math:`s_j^2`, and :math:`n_j` the mean, variance and sample size of the second group. The p-values are then approximated using the Studentized range distribution :math:`Q(\sqrt2*|t_i|, r, v_i)`. Note that the p-values might be slightly different than those obtained using R or Matlab since the studentized range approximation is done using the Gleason (1999) algorithm, which is more efficient and accurate than the algorithms used in Matlab or R. References ---------- .. [1] Games, Paul A., and John F. Howell. "Pairwise multiple comparison procedures with unequal n’s and/or variances: a Monte Carlo study." Journal of Educational Statistics 1.2 (1976): 113-125. .. [2] Gleason, John R. "An accurate, non-iterative approximation for studentized range quantiles." Computational statistics & data analysis 31.2 (1999): 147-158. Examples -------- Pairwise Games-Howell post-hocs on the pain threshold dataset. >>> from pingouin import pairwise_gameshowell >>> from pingouin.datasets import read_dataset >>> df = read_dataset('anova') >>> pairwise_gameshowell(dv='Pain threshold', between='Hair color', >>> data=df) ''' from pingouin.external.qsturng import psturng # Check the dataframe _check_dataframe(dv=dv, between=between, effects='between', data=data) # Reset index (avoid duplicate axis error) data = data.reset_index(drop=True) # Extract infos ng = data[between].unique().size grp = data.groupby(between)[dv] n = grp.count().values gmeans = grp.mean().values gvars = grp.var().values # Pairwise combinations g1, g2 = np.array(list(combinations(np.arange(ng), 2))).T mn = gmeans[g1] - gmeans[g2] se = np.sqrt(0.5 * (gvars[g1] / n[g1] + gvars[g2] / n[g2])) tval = mn / np.sqrt(gvars[g1] / n[g1] + gvars[g2] / n[g2]) df = (gvars[g1] / n[g1] + gvars[g2] / n[g2])**2 / \ ((((gvars[g1] / n[g1])**2) / (n[g1] - 1)) + (((gvars[g2] / n[g2])**2) / (n[g2] - 1))) # Compute corrected p-values pval = psturng(np.sqrt(2) * np.abs(tval), ng, df) pval *= 0.5 if tail == 'one-sided' else 1 # Uncorrected p-values # from scipy.stats import t # punc = t.sf(np.abs(tval), n[g1].size + n[g2].size - 2) * 2 # Effect size d = tval * np.sqrt(1 / n[g1] + 1 / n[g2]) ef = convert_effsize(d, 'cohen', effsize, n[g1], n[g2]) # Create dataframe # Careful: pd.unique does NOT sort whereas numpy does stats = pd.DataFrame({ 'A': np.unique(data[between])[g1], 'B': np.unique(data[between])[g2], 'mean(A)': gmeans[g1], 'mean(B)': gmeans[g2], 'diff': mn, 'SE': se, 'tail': tail, 'T': tval, 'df': df, 'pval': pval, 'efsize': ef, 'eftype': effsize, }) col_round = ['mean(A)', 'mean(B)', 'diff', 'SE', 'T', 'df', 'efsize'] stats[col_round] = stats[col_round].round(3) return stats
def test_convert_effsize(self): """Test function convert_effsize""" d = .40 r = .65 convert_effsize(d, 'cohen', 'eta-square') convert_effsize(d, 'cohen', 'hedges', nx=10, ny=10) convert_effsize(d, 'cohen', 'r', nx=10, ny=10) convert_effsize(r, 'r', 'cohen') convert_effsize(d, 'cohen', 'r') convert_effsize(d, 'cohen', 'hedges') convert_effsize(d, 'cohen', 'glass') convert_effsize(d, 'cohen', 'none') with pytest.raises(ValueError): convert_effsize(d, 'coucou', 'hibou') with pytest.raises(ValueError): convert_effsize(d, 'AUC', 'eta-square') # Compare with R assert np.allclose(convert_effsize(1.002549, 'cohen', 'r'), 0.4481248) assert np.allclose(convert_effsize(0.4481248, 'r', 'cohen'), 1.002549)