Exemple #1
0
def _pairwise_ttests(self,
                     dv=None,
                     between=None,
                     within=None,
                     subject=None,
                     parametric=True,
                     alpha=.05,
                     tail='two-sided',
                     padjust='none',
                     effsize='hedges',
                     return_desc=False,
                     export_filename=None):
    """Post-hoc tests."""
    posthoc = pairwise_ttests(data=self,
                              dv=dv,
                              between=between,
                              within=within,
                              subject=subject,
                              parametric=parametric,
                              alpha=alpha,
                              tail=tail,
                              padjust=padjust,
                              effsize=effsize,
                              return_desc=return_desc,
                              export_filename=export_filename)
    return posthoc
Exemple #2
0
    def test_pairwise_ttests(self):
        """Test function pairwise_ttests.
        Tested against the pairwise.t.test R function."""
        df = read_dataset('mixed_anova.csv')
        # Within + Between + Within * Between
        pairwise_ttests(dv='Scores',
                        within='Time',
                        between='Group',
                        subject='Subject',
                        data=df,
                        alpha=.01)
        pairwise_ttests(dv='Scores',
                        within=['Time'],
                        between=['Group'],
                        subject='Subject',
                        data=df,
                        padjust='fdr_bh',
                        return_desc=True)
        # Simple within
        # In R:
        # >>> pairwise.t.test(df$Scores, df$Time, pool.sd = FALSE,
        # ...                 p.adjust.method = 'holm', paired = TRUE)
        pt = pairwise_ttests(dv='Scores',
                             within='Time',
                             subject='Subject',
                             data=df,
                             return_desc=True,
                             padjust='holm')
        np.testing.assert_array_equal(pt.loc[:, 'p-corr'].round(3),
                                      [0.174, 0.024, 0.310])
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.087, 0.008, 0.310])
        pairwise_ttests(dv='Scores',
                        within='Time',
                        subject='Subject',
                        data=df,
                        parametric=False,
                        return_desc=True)
        # Simple between
        # In R:
        # >>> pairwise.t.test(df$Scores, df$Group, pool.sd = FALSE)
        pt = pairwise_ttests(dv='Scores', between='Group', data=df).round(3)
        assert pt.loc[0, 'p-unc'] == 0.023
        pairwise_ttests(dv='Scores',
                        between='Group',
                        data=df,
                        padjust='bonf',
                        tail='one-sided',
                        effsize='cohen',
                        parametric=False,
                        export_filename='test_export.csv')

        # Two between factors
        pairwise_ttests(dv='Scores',
                        between=['Time', 'Group'],
                        data=df,
                        padjust='holm')
        pairwise_ttests(dv='Scores',
                        between=['Time', 'Group'],
                        data=df,
                        padjust='holm',
                        parametric=False)

        # Two within subject factors
        pairwise_ttests(dv='Scores',
                        within=['Group', 'Time'],
                        subject='Subject',
                        data=df,
                        padjust='bonf')
        pairwise_ttests(dv='Scores',
                        within=['Group', 'Time'],
                        subject='Subject',
                        data=df,
                        padjust='bonf',
                        parametric=False)

        # Wrong tail argument
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores',
                            between='Group',
                            data=df,
                            tail='wrong')
        # Wrong alpha argument
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df, alpha='.05')

        # Both multiple between and multiple within
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores',
                            between=['Time', 'Group'],
                            within=['Time', 'Group'],
                            subject='Subject',
                            data=df)

        # Missing values
        df.iloc[[10, 15], 0] = np.nan
        pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df)
        # Wrong input argument
        df['Group'] = 'Control'
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df)

        # Two within factors from other datasets and with NaN values
        df2 = read_dataset('rm_anova')
        pairwise_ttests(dv='DesireToKill',
                        within=['Disgustingness', 'Frighteningness'],
                        subject='Subject',
                        padjust='holm',
                        data=df2)

        # Compare with JASP tail / parametric argument
        df = read_dataset('pairwise_ttests')
        # 1. Within
        # 1.1 Parametric
        # 1.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.907, 0.941, 0.405])
        assert all(pt.loc[:, 'BF10'].astype(float) < 1)
        # 1.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.595])
        assert sum(pt.loc[:, 'BF10'].astype(float) > 1) == 2
        # 1.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.405])

        # 1.2 Non-parametric
        # 1.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.910, 0.951, 0.482])
        # 1.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.554])
        # 1.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.482])

        # Compare the RBC value for wilcoxon
        from pingouin.nonparametric import wilcoxon
        x = df[df['Drug'] == 'A']['Scores'].values
        y = df[df['Drug'] == 'B']['Scores'].values
        assert -0.6 < wilcoxon(x, y).at['Wilcoxon', 'RBC'] < -0.4
        x = df[df['Drug'] == 'B']['Scores'].values
        y = df[df['Drug'] == 'C']['Scores'].values
        assert wilcoxon(x, y).at['Wilcoxon', 'RBC'].round(3) == 0.030

        # 2. Between
        # 2.1 Parametric
        # 2.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1
        # 2.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.932
        assert float(pt.loc[0, 'BF10']) < 1
        # 2.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1

        # 2.2 Non-parametric
        # 2.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105
        # 2.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.901
        # 2.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105

        # Compare the RBC value for MWU
        from pingouin.nonparametric import mwu
        x = df[df['Gender'] == 'M']['Scores'].values
        y = df[df['Gender'] == 'F']['Scores'].values
        assert abs(mwu(x, y).at['MWU', 'RBC']) == 0.252
    def test_pairwise_ttests(self):
        """Test function pairwise_ttests.
        Tested against the pairwise.t.test R function, as well as JASP and
        JAMOVI.

        Notes:
        1) JAMOVI by default pool the error term for the within-subject
        factor in mixed design. Pingouin does not pool the error term,
        which is the same behavior as JASP.

        2) JASP does not return the uncorrected p-values, therefore only the
        corrected p-values are compared.

        3) JASP does not calculate the Bayes Factor for the interaction terms.
        For mixed design and two-way design, in JASP, the Bayes Factor
        seems to be calculated without aggregating over repeated measurements.

        4) For factorial between-subject contrasts, both JASP and JAMOVI pool
        the error term. This option is not yet implemented in Pingouin.
        Therefore, one cannot directly validate the T and p-values.
        """
        df = read_dataset('mixed_anova.csv')  # Simple and mixed design
        df_sort = df.sort_values('Time')  # Same but random order of subject
        # df_sort = df.sample(frac=1)
        df_unb = read_dataset('mixed_anova_unbalanced')
        df_rm2 = read_dataset('rm_anova2')  # 2-way rm design
        df_aov2 = read_dataset('anova2')  # 2-way factorial design

        # -------------------------------------------------------------------
        # Simple within: EASY!
        # -------------------------------------------------------------------
        # In R:
        # >>> pairwise.t.test(df$Scores, df$Time, pool.sd = FALSE,
        # ...                 p.adjust.method = 'holm', paired = TRUE)
        pt = pairwise_ttests(dv='Scores',
                             within='Time',
                             subject='Subject',
                             data=df,
                             return_desc=True,
                             padjust='holm')
        np.testing.assert_array_equal(pt.loc[:, 'p-corr'].round(3),
                                      [0.174, 0.024, 0.310])
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.087, 0.008, 0.310])
        pairwise_ttests(dv='Scores',
                        within='Time',
                        subject='Subject',
                        data=df,
                        parametric=False,
                        return_desc=True)

        # Same after random ordering of subject (issue 151)
        pt_sort = pairwise_ttests(dv='Scores',
                                  within='Time',
                                  subject='Subject',
                                  data=df_sort,
                                  return_desc=True,
                                  padjust='holm')
        assert pt_sort.equals(pt)

        # -------------------------------------------------------------------
        # Simple between: EASY!
        # -------------------------------------------------------------------
        # In R: >>> pairwise.t.test(df$Scores, df$Group, pool.sd = FALSE)
        pt = pairwise_ttests(dv='Scores', between='Group', data=df).round(3)
        assert pt.loc[0, 'p-unc'] == 0.023
        pairwise_ttests(dv='Scores',
                        between='Group',
                        data=df,
                        padjust='bonf',
                        tail='one-sided',
                        effsize='cohen',
                        parametric=False)

        # Same after random ordering of subject (issue 151)
        pt_sort = pairwise_ttests(dv='Scores', between='Group',
                                  data=df_sort).round(3)
        assert pt_sort.equals(pt)

        # -------------------------------------------------------------------
        # Mixed design: Within + Between + Within * Between
        # -------------------------------------------------------------------
        # .Balanced data
        # ..With marginal means
        pt = pairwise_ttests(dv='Scores',
                             within='Time',
                             between='Group',
                             subject='Subject',
                             data=df,
                             padjust='holm',
                             interaction=False)
        # ...Within main effect: OK with JASP
        assert np.array_equal(pt['Paired'], [True, True, True, False])
        assert np.array_equal(pt.loc[:2, 'p-corr'].round(3),
                              [0.174, 0.024, 0.310])
        assert np.array_equal(pt.loc[:2, 'BF10'].astype(float),
                              [0.582, 4.232, 0.232])
        # ..Between main effect: T and p-values OK with JASP
        #   but BF10 is only similar when marginal=False (see note in the
        #   2-way RM test below).
        assert pt.loc[3, 'T'].round(3) == -2.248
        assert pt.loc[3, 'p-unc'].round(3) == 0.028
        # ..Interaction: slightly different because JASP pool the error term
        #    across the between-subject groups. JASP does not compute the BF10
        #    for the interaction.
        pt = pairwise_ttests(dv='Scores',
                             within='Time',
                             between='Group',
                             subject='Subject',
                             data=df,
                             padjust='holm',
                             interaction=True).round(5)
        # Same after random ordering of subject (issue 151)
        pt_sort = pairwise_ttests(dv='Scores',
                                  within='Time',
                                  between='Group',
                                  subject='Subject',
                                  data=df_sort,
                                  padjust='holm',
                                  interaction=True).round(5)
        assert pt_sort.equals(pt)

        # ..Changing the order of the model with ``within_first=False``.
        #   output model is now between + within + between * within.
        # https://github.com/raphaelvallat/pingouin/issues/102
        pt = pairwise_ttests(dv='Scores',
                             within='Time',
                             between='Group',
                             subject='Subject',
                             data=df,
                             padjust='holm',
                             within_first=False)
        # This should be equivalent to manually filtering dataframe to keep
        # only one level at a time of the between factor and then running
        # a within-subject pairwise T-tests.
        pt_con = pairwise_ttests(dv='Scores',
                                 within='Time',
                                 subject='Subject',
                                 padjust='holm',
                                 data=df[df['Group'] == 'Control'])
        pt_med = pairwise_ttests(dv='Scores',
                                 within='Time',
                                 subject='Subject',
                                 padjust='holm',
                                 data=df[df['Group'] == 'Meditation'])
        pt_merged = pt_con.append(pt_med)
        # T, dof and p-values should be equal
        assert np.array_equal(pt_merged['T'], pt['T'].iloc[4:])
        assert np.array_equal(pt_merged['dof'], pt['dof'].iloc[4:])
        assert np.array_equal(pt_merged['p-unc'], pt['p-unc'].iloc[4:])
        # However adjusted p-values are not equal because they are calculated
        # separately on each dataframe.
        assert not np.array_equal(pt_merged['p-corr'], pt['p-corr'].iloc[4:])
        # I also manually checked the previous lines using parametric=False and
        # one-sided test.

        # Other options
        pairwise_ttests(dv='Scores',
                        within=['Time'],
                        between=['Group'],
                        subject='Subject',
                        data=df,
                        padjust='fdr_bh',
                        alpha=.01,
                        return_desc=True,
                        parametric=False)

        # .Unbalanced data
        # ..With marginal means
        pt1 = pairwise_ttests(dv='Scores',
                              within='Time',
                              between='Group',
                              subject='Subject',
                              data=df_unb,
                              padjust='bonf')
        # ...Within main effect: OK with JASP
        assert np.array_equal(pt1.loc[:5, 'T'].round(3),
                              [-0.777, -1.344, -2.039, -0.814, -1.492, -0.627])
        assert np.array_equal(pt1.loc[:5, 'p-corr'].round(3),
                              [1., 1., 0.313, 1., 0.889, 1.])
        assert np.array_equal(pt1.loc[:5, 'BF10'].astype(float),
                              [0.273, 0.463, 1.221, 0.280, 0.554, 0.248])
        # ...Between main effect: slightly different from JASP (why?)
        #      True with or without the Welch correction...
        assert (pt1.loc[6:8, 'p-corr'] > 0.20).all()
        # ...Interaction: slightly different because JASP pool the error term
        #    across the between-subject groups.
        # Below the interaction JASP bonferroni-correct p-values, which are
        # more conservative because JASP perform all possible pairwise tests
        # jasp_pbonf = [1., 1., 1., 1., 1., 1., 1., 0.886, 1., 1., 1., 1.]
        assert (pt1.loc[9:, 'p-corr'] > 0.05).all()
        # Check that the Welch corection is applied by default
        assert not pt1['dof'].apply(lambda x: x.is_integer()).all()

        # Same after random ordering of subject (issue 151)
        pt_sort = pairwise_ttests(dv='Scores',
                                  within='Time',
                                  between='Group',
                                  subject='Subject',
                                  data=df_unb.sample(frac=1, replace=False),
                                  padjust='bonf')
        assert pt_sort.round(5).equals(pt1.round(5))

        # ..No marginal means
        pt2 = pairwise_ttests(dv='Scores',
                              within='Time',
                              between='Group',
                              subject='Subject',
                              data=df_unb,
                              padjust='bonf',
                              marginal=False)

        # This only impacts the between-subject contrast
        np.array_equal(
            (pt1['T'] == pt2['T']).astype(int),
            [1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
        assert (pt1.loc[6:8, 'dof'] < pt2.loc[6:8, 'dof']).all()

        # Without the Welch correction, check that all the DF are integer
        pt3 = pairwise_ttests(dv='Scores',
                              within='Time',
                              between='Group',
                              subject='Subject',
                              data=df_unb,
                              correction=False)
        assert pt3['dof'].apply(lambda x: x.is_integer()).all()

        # -------------------------------------------------------------------
        # Two between factors (FACTORIAL)
        # -------------------------------------------------------------------
        pt = df_aov2.pairwise_ttests(dv='Yield',
                                     between=['Blend', 'Crop'],
                                     padjust='holm').round(3)

        # The T and p-values are close but not exactly the same as JASP /
        # JAMOVI, because they both pool the error term.
        # The dof are not available in JASP, but in JAMOVI they are 18
        # everywhere, which I'm not sure to understand why...
        assert np.array_equal(pt.loc[:3, 'p-unc'] < 0.05,
                              [False, False, False, True])

        # However, the Bayes Factor of the simple main effects are the same...!
        np.array_equal(pt.loc[:3, 'BF10'].astype(float),
                       [0.374, 0.533, 0.711, 2.287])

        # Using the Welch method (all df should be non-integer)
        pt_c = df_aov2.pairwise_ttests(dv='Yield',
                                       between=['Blend', 'Crop'],
                                       padjust='holm',
                                       correction=True)
        assert not pt_c['dof'].apply(lambda x: x.is_integer()).any()

        # The ``marginal`` option has no impact here.
        assert pt.equals(
            df_aov2.pairwise_ttests(dv='Yield',
                                    between=['Blend', 'Crop'],
                                    padjust='holm',
                                    marginal=True).round(3))
        # -------------------------------------------------------------------
        # Two within subject factors
        # -------------------------------------------------------------------
        # .Marginal = True
        ptw1 = pairwise_ttests(data=df_rm2,
                               dv='Performance',
                               within=['Time', 'Metric'],
                               subject='Subject',
                               padjust='bonf',
                               marginal=True).round(3)
        # Compare the T values of the simple main effect against JASP
        # Note that the T-values of the interaction are slightly different
        # because JASP pool the error term.
        assert np.array_equal(ptw1.loc[0:3, 'T'], [5.818, 1.559, 7.714, 5.110])

        # Random sorting of the dataframe (issue 151)
        pt_sort = pairwise_ttests(data=df_rm2.sample(frac=1),
                                  dv='Performance',
                                  within=['Time', 'Metric'],
                                  subject='Subject',
                                  padjust='bonf',
                                  marginal=True).round(3)
        assert pt_sort.equals(ptw1)

        # Non-parametric (mostly for code coverage)
        pairwise_ttests(data=df_rm2,
                        dv='Performance',
                        within=['Time', 'Metric'],
                        subject='Subject',
                        parametric=False)

        # -------------------------------------------------------------------
        # ERRORS
        # -------------------------------------------------------------------
        # Both multiple between and multiple within
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores',
                            between=['Time', 'Group'],
                            within=['Time', 'Group'],
                            subject='Subject',
                            data=df)

        # Wrong input argument
        df['Group'] = 'Control'
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df)

        # -------------------------------------------------------------------
        # Missing values in repeated measurements
        # -------------------------------------------------------------------
        # 1. Parametric
        df = read_dataset('pairwise_ttests_missing')
        st = pairwise_ttests(dv='Value',
                             within='Condition',
                             subject='Subject',
                             data=df,
                             nan_policy='listwise')
        np.testing.assert_array_equal(st['dof'].to_numpy(), [7, 7, 7])
        st2 = pairwise_ttests(dv='Value',
                              within='Condition',
                              data=df,
                              subject='Subject',
                              nan_policy='pairwise')
        np.testing.assert_array_equal(st2['dof'].to_numpy(), [8, 7, 8])
        # 2. Non-parametric
        st = pairwise_ttests(dv='Value',
                             within='Condition',
                             subject='Subject',
                             data=df,
                             parametric=False,
                             nan_policy='listwise')
        np.testing.assert_array_equal(st['W-val'].to_numpy(), [9, 3, 12])
        st2 = pairwise_ttests(dv='Value',
                              within='Condition',
                              data=df,
                              subject='Subject',
                              nan_policy='pairwise',
                              parametric=False)
        # Tested against a simple for loop on combinations
        np.testing.assert_array_equal(st2['W-val'].to_numpy(), [9, 3, 21])

        with pytest.raises(ValueError):
            # Unbalanced design in repeated measurements
            df_unbalanced = df.iloc[1:, :].copy()
            pairwise_ttests(data=df_unbalanced,
                            dv='Value',
                            within='Condition',
                            subject='Subject')

        # Two within factors from other datasets and with NaN values
        df2 = read_dataset('rm_anova')
        pairwise_ttests(dv='DesireToKill',
                        within=['Disgustingness', 'Frighteningness'],
                        subject='Subject',
                        padjust='holm',
                        data=df2)

        # -------------------------------------------------------------------
        # Test tail / parametric argument (compare with JASP)
        # -------------------------------------------------------------------
        df = read_dataset('pairwise_ttests')
        # 1. Within
        # 1.1 Parametric
        # 1.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.907, 0.941, 0.405])
        assert all(pt.loc[:, 'BF10'].astype(float) < 1)
        # 1.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.595])
        assert sum(pt.loc[:, 'BF10'].astype(float) > 1) == 2
        # 1.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.405])

        # 1.2 Non-parametric
        # 1.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.910, 0.951, 0.483])
        # 1.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.551])
        # 1.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.483])

        # Compare the RBC value for wilcoxon
        from pingouin.nonparametric import wilcoxon
        x = df[df['Drug'] == 'A']['Scores'].to_numpy()
        y = df[df['Drug'] == 'B']['Scores'].to_numpy()
        assert -0.6 < wilcoxon(x, y).at['Wilcoxon', 'RBC'] < -0.4
        x = df[df['Drug'] == 'B']['Scores'].to_numpy()
        y = df[df['Drug'] == 'C']['Scores'].to_numpy()
        assert wilcoxon(x, y).at['Wilcoxon', 'RBC'].round(3) == 0.030

        # 2. Between
        # 2.1 Parametric
        # 2.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.932
        assert float(pt.loc[0, 'BF10']) < 1
        # 2.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1
        # 2.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1

        # 2.2 Non-parametric
        # 2.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.901
        # 2.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105
        # 2.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105

        # Compare the RBC value for MWU
        from pingouin.nonparametric import mwu
        x = df[df['Gender'] == 'M']['Scores'].to_numpy()
        y = df[df['Gender'] == 'F']['Scores'].to_numpy()
        assert round(abs(mwu(x, y).at['MWU', 'RBC']), 3) == 0.252
    def test_pairwise_ttests(self):
        """Test function pairwise_ttests"""
        # Within + Between + Within * Between
        pairwise_ttests(dv='Scores', within='Time', between='Group',
                        subject='Subject', data=df, alpha=.01)
        pairwise_ttests(dv='Scores', within=['Time'], between=['Group'],
                        subject='Subject', data=df, padjust='fdr_bh',
                        return_desc=True)
        # Simple within
        pairwise_ttests(dv='Scores', within='Time', subject='Subject',
                        data=df, return_desc=True)
        # Simple between
        pairwise_ttests(dv='Scores', between='Group',
                        data=df, padjust='bonf', tail='one-sided',
                        effsize='cohen', export_filename='test_export.csv')

        # Two between factors
        pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df,
                        padjust='holm')

        # Two within subject factors
        pairwise_ttests(dv='Scores', within=['Group', 'Time'],
                        subject='Subject', data=df, padjust='bonf')

        # Wrong tail argument
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df,
                            tail='wrong')
        # Wrong alpha argument
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df, alpha='.05')

        # Both multiple between and multiple within
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between=['Time', 'Group'],
                            within=['Time', 'Group'], subject='Subject',
                            data=df)

        # Missing values
        df.iloc[[10, 15], 0] = np.nan
        pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df)
        # Wrong input argument
        df['Group'] = 'Control'
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df)

        # Two within factors from other datasets and with NaN values
        df2 = read_dataset('rm_anova')
        pairwise_ttests(dv='DesireToKill',
                        within=['Disgustingness', 'Frighteningness'],
                        subject='Subject', padjust='holm', data=df2)
    def test_pairwise_ttests(self):
        """Test function pairwise_ttests.
        Tested against the pairwise.t.test R function."""
        df = read_dataset('mixed_anova.csv')
        # Within + Between + Within * Between
        pairwise_ttests(dv='Scores', within='Time', between='Group',
                        subject='Subject', data=df, alpha=.01)
        pairwise_ttests(dv='Scores', within=['Time'], between=['Group'],
                        subject='Subject', data=df, padjust='fdr_bh',
                        return_desc=True)
        # Simple within
        # In R:
        # >>> pairwise.t.test(df$Scores, df$Time, pool.sd = FALSE,
        # ...                 p.adjust.method = 'holm', paired = TRUE)
        pt = pairwise_ttests(dv='Scores', within='Time', subject='Subject',
                             data=df, return_desc=True, padjust='holm')
        np.testing.assert_array_equal(pt.loc[:, 'p-corr'].round(3),
                                      [0.174, 0.024, 0.310])
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.087, 0.008, 0.310])
        pairwise_ttests(dv='Scores', within='Time', subject='Subject',
                        data=df, parametric=False, return_desc=True)
        # Simple between
        # In R:
        # >>> pairwise.t.test(df$Scores, df$Group, pool.sd = FALSE)
        pt = pairwise_ttests(dv='Scores', between='Group', data=df).round(3)
        assert pt.loc[0, 'p-unc'] == 0.023
        pairwise_ttests(dv='Scores', between='Group',
                        data=df, padjust='bonf', tail='one-sided',
                        effsize='cohen', parametric=False,
                        export_filename='test_export.csv')

        # Two between factors
        pt = pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df,
                             padjust='holm').round(3)
        pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df,
                        padjust='holm', parametric=False)
        # .. with no interaction
        pt_no_inter = df.pairwise_ttests(dv='Scores',
                                         between=['Time', 'Group'],
                                         interaction=False,
                                         padjust='holm').round(3)
        assert pt.drop(columns=['Time']).iloc[0:4, :].equals(pt_no_inter)

        # Two within subject factors
        ptw = pairwise_ttests(data=df, dv='Scores', within=['Group', 'Time'],
                              subject='Subject', padjust='bonf',
                              parametric=False).round(3)
        ptw_no_inter = df.pairwise_ttests(dv='Scores',
                                          within=['Group', 'Time'],
                                          subject='Subject', padjust='bonf',
                                          interaction=False,
                                          parametric=False).round(3)
        assert ptw.drop(columns=['Group']).iloc[0:4, :].equals(ptw_no_inter)

        # Both multiple between and multiple within
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between=['Time', 'Group'],
                            within=['Time', 'Group'], subject='Subject',
                            data=df)

        # Wrong input argument
        df['Group'] = 'Control'
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df)

        # Missing values in repeated measurements
        # 1. Parametric
        df = read_dataset('pairwise_ttests_missing')
        st = pairwise_ttests(dv='Value', within='Condition', subject='Subject',
                             data=df, nan_policy='listwise')
        np.testing.assert_array_equal(st['dof'].values, [7, 7, 7])
        st2 = pairwise_ttests(dv='Value', within='Condition', data=df,
                              subject='Subject', nan_policy='pairwise')
        np.testing.assert_array_equal(st2['dof'].values, [8, 7, 8])
        # 2. Non-parametric
        st = pairwise_ttests(dv='Value', within='Condition', subject='Subject',
                             data=df, parametric=False, nan_policy='listwise')
        np.testing.assert_array_equal(st['W-val'].values, [9, 3, 12])
        st2 = pairwise_ttests(dv='Value', within='Condition', data=df,
                              subject='Subject', nan_policy='pairwise',
                              parametric=False)
        # Tested against a simple for loop on combinations
        np.testing.assert_array_equal(st2['W-val'].values, [9, 3, 21])

        with pytest.raises(ValueError):
            # Unbalanced design in repeated measurements
            df_unbalanced = df.iloc[1:, :].copy()
            pairwise_ttests(data=df_unbalanced, dv='Value', within='Condition',
                            subject='Subject')

        # Two within factors from other datasets and with NaN values
        df2 = read_dataset('rm_anova')
        pairwise_ttests(dv='DesireToKill',
                        within=['Disgustingness', 'Frighteningness'],
                        subject='Subject', padjust='holm', data=df2)

        # Compare with JASP tail / parametric argument
        df = read_dataset('pairwise_ttests')
        # 1. Within
        # 1.1 Parametric
        # 1.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             data=df, tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.907, 0.941, 0.405])
        assert all(pt.loc[:, 'BF10'].astype(float) < 1)
        # 1.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             data=df, tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.595])
        assert sum(pt.loc[:, 'BF10'].astype(float) > 1) == 2
        # 1.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             data=df, tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.405])

        # 1.2 Non-parametric
        # 1.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             parametric=False, data=df, tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.910, 0.951, 0.482])
        # 1.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             parametric=False, data=df, tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.554])
        # 1.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             parametric=False, data=df, tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.482])

        # Compare the RBC value for wilcoxon
        from pingouin.nonparametric import wilcoxon
        x = df[df['Drug'] == 'A']['Scores'].values
        y = df[df['Drug'] == 'B']['Scores'].values
        assert -0.6 < wilcoxon(x, y).at['Wilcoxon', 'RBC'] < -0.4
        x = df[df['Drug'] == 'B']['Scores'].values
        y = df[df['Drug'] == 'C']['Scores'].values
        assert wilcoxon(x, y).at['Wilcoxon', 'RBC'].round(3) == 0.030

        # 2. Between
        # 2.1 Parametric
        # 2.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             data=df, tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1
        # 2.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             data=df, tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.932
        assert float(pt.loc[0, 'BF10']) < 1
        # 2.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             data=df, tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1

        # 2.2 Non-parametric
        # 2.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             parametric=False, data=df, tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105
        # 2.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             parametric=False, data=df, tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.901
        # 2.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             parametric=False, data=df, tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105

        # Compare the RBC value for MWU
        from pingouin.nonparametric import mwu
        x = df[df['Gender'] == 'M']['Scores'].values
        y = df[df['Gender'] == 'F']['Scores'].values
        assert abs(mwu(x, y).at['MWU', 'RBC']) == 0.252