예제 #1
0
 def test_wilcoxon(self):
     """Test function wilcoxon"""
     # R: wilcox.test(df$x, df$y, paired = TRUE, exact = FALSE)
     # The V value is slightly different between SciPy and R
     # The p-value, however, is almost identical
     wc_scp = scipy.stats.wilcoxon(x, y, correction=True)
     wc_pg = wilcoxon(x, y, tail='two-sided')
     wc_pg_1 = wilcoxon(x, y, tail='one-sided')
     assert wc_scp[0] == wc_pg.at['Wilcoxon', 'W-val']
     assert wc_scp[1] == wc_pg.at['Wilcoxon', 'p-val']
     # Compare to R canprot::CLES
     assert wc_pg.at['Wilcoxon', 'CLES'] == 0.536
     assert (wc_pg.at['Wilcoxon', 'p-val'] / 2) == wc_pg_1.at['Wilcoxon',
                                                              'p-val']
예제 #2
0
 def test_wilcoxon(self):
     """Test function wilcoxon"""
     # R: wilcox.test(df$x, df$y, paired = TRUE, exact = FALSE)
     # The V value is slightly different between SciPy and R
     # The p-value, however, is almost identical
     wc_scp = scipy.stats.wilcoxon(x2, y2, correction=True)
     wc_pg = wilcoxon(x2, y2, alternative='two-sided')
     assert wc_scp[0] == wc_pg.at['Wilcoxon', 'W-val'] == 20.5  # JASP
     assert wc_scp[1] == wc_pg.at['Wilcoxon', 'p-val']
     # Same but using the pre-computed difference
     # The W and p-values should be similar
     wc_pg2 = wilcoxon(np.array(x2) - np.array(y2))
     assert wc_pg.at['Wilcoxon', 'W-val'] == wc_pg2.at['Wilcoxon', 'W-val']
     assert wc_pg.at['Wilcoxon', 'p-val'] == wc_pg2.at['Wilcoxon', 'p-val']
     assert wc_pg.at['Wilcoxon', 'RBC'] == wc_pg2.at['Wilcoxon', 'RBC']
     assert np.isnan(wc_pg2.at['Wilcoxon', 'CLES'])
     wc_pg_less = wilcoxon(x2, y2, alternative='less')
     wc_pg_greater = wilcoxon(x2, y2, alternative='greater')
     # Note that the RBC value are compared to JASP in test_pairwise.py
     # The RBC values in JASP does not change according to the tail.
     assert round(wc_pg.at['Wilcoxon', 'RBC'], 3) == -0.379
     assert round(wc_pg_less.at['Wilcoxon', 'RBC'], 3) == -0.379
     assert round(wc_pg_greater.at['Wilcoxon', 'RBC'], 3) == -0.379
     # CLES is compared to:
     # https://janhove.github.io/reporting/2016/11/16/common-language-effect-sizes
     assert round(wc_pg.at['Wilcoxon', 'CLES'], 3) == 0.396
     assert round(wc_pg_less.at['Wilcoxon', 'CLES'], 3) == 0.604
     assert round(wc_pg_greater.at['Wilcoxon', 'CLES'], 3) == 0.396
     with pytest.raises(ValueError):
         wilcoxon(x2, y2, tail='error')
예제 #3
0
 def test_wilcoxon(self):
     """Test function wilcoxon"""
     # R: wilcox.test(df$x, df$y, paired = TRUE, exact = FALSE)
     # The V value is slightly different between SciPy and R
     # The p-value, however, is almost identical
     wc_scp = scipy.stats.wilcoxon(x2, y2, correction=True)
     wc_pg = wilcoxon(x2, y2, tail='two-sided')
     assert wc_scp[0] == wc_pg.at['Wilcoxon', 'W-val'] == 20.5  # JASP
     assert wc_scp[1] == wc_pg.at['Wilcoxon', 'p-val']
     wc_pg_less = wilcoxon(x2, y2, tail='less')
     wc_pg_greater = wilcoxon(x2, y2, tail='greater')
     wc_pg_ones = wilcoxon(x2, y2, tail='one-sided')
     pd.testing.assert_frame_equal(wc_pg_ones, wc_pg_less)
     # Note that the RBC value are compared to JASP in test_pairwise.py
     # The RBC values in JASP does not change according to the tail.
     assert round(wc_pg.at['Wilcoxon', 'RBC'], 3) == -0.379
     assert round(wc_pg_less.at['Wilcoxon', 'RBC'], 3) == -0.379
     assert round(wc_pg_greater.at['Wilcoxon', 'RBC'], 3) == -0.379
     # CLES is compared to:
     # https://janhove.github.io/reporting/2016/11/16/common-language-effect-sizes
     assert round(wc_pg.at['Wilcoxon', 'CLES'], 3) == 0.396
     assert round(wc_pg_less.at['Wilcoxon', 'CLES'], 3) == 0.604
     assert round(wc_pg_greater.at['Wilcoxon', 'CLES'], 3) == 0.396
예제 #4
0
    def test_pairwise_ttests(self):
        """Test function pairwise_ttests.
        Tested against the pairwise.t.test R function."""
        df = read_dataset('mixed_anova.csv')
        # Within + Between + Within * Between
        pairwise_ttests(dv='Scores',
                        within='Time',
                        between='Group',
                        subject='Subject',
                        data=df,
                        alpha=.01)
        pairwise_ttests(dv='Scores',
                        within=['Time'],
                        between=['Group'],
                        subject='Subject',
                        data=df,
                        padjust='fdr_bh',
                        return_desc=True)
        # Simple within
        # In R:
        # >>> pairwise.t.test(df$Scores, df$Time, pool.sd = FALSE,
        # ...                 p.adjust.method = 'holm', paired = TRUE)
        pt = pairwise_ttests(dv='Scores',
                             within='Time',
                             subject='Subject',
                             data=df,
                             return_desc=True,
                             padjust='holm')
        np.testing.assert_array_equal(pt.loc[:, 'p-corr'].round(3),
                                      [0.174, 0.024, 0.310])
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.087, 0.008, 0.310])
        pairwise_ttests(dv='Scores',
                        within='Time',
                        subject='Subject',
                        data=df,
                        parametric=False,
                        return_desc=True)
        # Simple between
        # In R:
        # >>> pairwise.t.test(df$Scores, df$Group, pool.sd = FALSE)
        pt = pairwise_ttests(dv='Scores', between='Group', data=df).round(3)
        assert pt.loc[0, 'p-unc'] == 0.023
        pairwise_ttests(dv='Scores',
                        between='Group',
                        data=df,
                        padjust='bonf',
                        tail='one-sided',
                        effsize='cohen',
                        parametric=False,
                        export_filename='test_export.csv')

        # Two between factors
        pairwise_ttests(dv='Scores',
                        between=['Time', 'Group'],
                        data=df,
                        padjust='holm')
        pairwise_ttests(dv='Scores',
                        between=['Time', 'Group'],
                        data=df,
                        padjust='holm',
                        parametric=False)

        # Two within subject factors
        pairwise_ttests(dv='Scores',
                        within=['Group', 'Time'],
                        subject='Subject',
                        data=df,
                        padjust='bonf')
        pairwise_ttests(dv='Scores',
                        within=['Group', 'Time'],
                        subject='Subject',
                        data=df,
                        padjust='bonf',
                        parametric=False)

        # Wrong tail argument
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores',
                            between='Group',
                            data=df,
                            tail='wrong')
        # Wrong alpha argument
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df, alpha='.05')

        # Both multiple between and multiple within
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores',
                            between=['Time', 'Group'],
                            within=['Time', 'Group'],
                            subject='Subject',
                            data=df)

        # Missing values
        df.iloc[[10, 15], 0] = np.nan
        pairwise_ttests(dv='Scores', within='Time', subject='Subject', data=df)
        # Wrong input argument
        df['Group'] = 'Control'
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df)

        # Two within factors from other datasets and with NaN values
        df2 = read_dataset('rm_anova')
        pairwise_ttests(dv='DesireToKill',
                        within=['Disgustingness', 'Frighteningness'],
                        subject='Subject',
                        padjust='holm',
                        data=df2)

        # Compare with JASP tail / parametric argument
        df = read_dataset('pairwise_ttests')
        # 1. Within
        # 1.1 Parametric
        # 1.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.907, 0.941, 0.405])
        assert all(pt.loc[:, 'BF10'].astype(float) < 1)
        # 1.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.595])
        assert sum(pt.loc[:, 'BF10'].astype(float) > 1) == 2
        # 1.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.405])

        # 1.2 Non-parametric
        # 1.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.910, 0.951, 0.482])
        # 1.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.554])
        # 1.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.482])

        # Compare the RBC value for wilcoxon
        from pingouin.nonparametric import wilcoxon
        x = df[df['Drug'] == 'A']['Scores'].values
        y = df[df['Drug'] == 'B']['Scores'].values
        assert -0.6 < wilcoxon(x, y).at['Wilcoxon', 'RBC'] < -0.4
        x = df[df['Drug'] == 'B']['Scores'].values
        y = df[df['Drug'] == 'C']['Scores'].values
        assert wilcoxon(x, y).at['Wilcoxon', 'RBC'].round(3) == 0.030

        # 2. Between
        # 2.1 Parametric
        # 2.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1
        # 2.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.932
        assert float(pt.loc[0, 'BF10']) < 1
        # 2.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1

        # 2.2 Non-parametric
        # 2.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105
        # 2.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.901
        # 2.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105

        # Compare the RBC value for MWU
        from pingouin.nonparametric import mwu
        x = df[df['Gender'] == 'M']['Scores'].values
        y = df[df['Gender'] == 'F']['Scores'].values
        assert abs(mwu(x, y).at['MWU', 'RBC']) == 0.252
예제 #5
0
    def test_pairwise_ttests(self):
        """Test function pairwise_ttests.
        Tested against the pairwise.t.test R function, as well as JASP and
        JAMOVI.

        Notes:
        1) JAMOVI by default pool the error term for the within-subject
        factor in mixed design. Pingouin does not pool the error term,
        which is the same behavior as JASP.

        2) JASP does not return the uncorrected p-values, therefore only the
        corrected p-values are compared.

        3) JASP does not calculate the Bayes Factor for the interaction terms.
        For mixed design and two-way design, in JASP, the Bayes Factor
        seems to be calculated without aggregating over repeated measurements.

        4) For factorial between-subject contrasts, both JASP and JAMOVI pool
        the error term. This option is not yet implemented in Pingouin.
        Therefore, one cannot directly validate the T and p-values.
        """
        df = read_dataset('mixed_anova.csv')  # Simple and mixed design
        df_sort = df.sort_values('Time')  # Same but random order of subject
        # df_sort = df.sample(frac=1)
        df_unb = read_dataset('mixed_anova_unbalanced')
        df_rm2 = read_dataset('rm_anova2')  # 2-way rm design
        df_aov2 = read_dataset('anova2')  # 2-way factorial design

        # -------------------------------------------------------------------
        # Simple within: EASY!
        # -------------------------------------------------------------------
        # In R:
        # >>> pairwise.t.test(df$Scores, df$Time, pool.sd = FALSE,
        # ...                 p.adjust.method = 'holm', paired = TRUE)
        pt = pairwise_ttests(dv='Scores',
                             within='Time',
                             subject='Subject',
                             data=df,
                             return_desc=True,
                             padjust='holm')
        np.testing.assert_array_equal(pt.loc[:, 'p-corr'].round(3),
                                      [0.174, 0.024, 0.310])
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.087, 0.008, 0.310])
        pairwise_ttests(dv='Scores',
                        within='Time',
                        subject='Subject',
                        data=df,
                        parametric=False,
                        return_desc=True)

        # Same after random ordering of subject (issue 151)
        pt_sort = pairwise_ttests(dv='Scores',
                                  within='Time',
                                  subject='Subject',
                                  data=df_sort,
                                  return_desc=True,
                                  padjust='holm')
        assert pt_sort.equals(pt)

        # -------------------------------------------------------------------
        # Simple between: EASY!
        # -------------------------------------------------------------------
        # In R: >>> pairwise.t.test(df$Scores, df$Group, pool.sd = FALSE)
        pt = pairwise_ttests(dv='Scores', between='Group', data=df).round(3)
        assert pt.loc[0, 'p-unc'] == 0.023
        pairwise_ttests(dv='Scores',
                        between='Group',
                        data=df,
                        padjust='bonf',
                        tail='one-sided',
                        effsize='cohen',
                        parametric=False)

        # Same after random ordering of subject (issue 151)
        pt_sort = pairwise_ttests(dv='Scores', between='Group',
                                  data=df_sort).round(3)
        assert pt_sort.equals(pt)

        # -------------------------------------------------------------------
        # Mixed design: Within + Between + Within * Between
        # -------------------------------------------------------------------
        # .Balanced data
        # ..With marginal means
        pt = pairwise_ttests(dv='Scores',
                             within='Time',
                             between='Group',
                             subject='Subject',
                             data=df,
                             padjust='holm',
                             interaction=False)
        # ...Within main effect: OK with JASP
        assert np.array_equal(pt['Paired'], [True, True, True, False])
        assert np.array_equal(pt.loc[:2, 'p-corr'].round(3),
                              [0.174, 0.024, 0.310])
        assert np.array_equal(pt.loc[:2, 'BF10'].astype(float),
                              [0.582, 4.232, 0.232])
        # ..Between main effect: T and p-values OK with JASP
        #   but BF10 is only similar when marginal=False (see note in the
        #   2-way RM test below).
        assert pt.loc[3, 'T'].round(3) == -2.248
        assert pt.loc[3, 'p-unc'].round(3) == 0.028
        # ..Interaction: slightly different because JASP pool the error term
        #    across the between-subject groups. JASP does not compute the BF10
        #    for the interaction.
        pt = pairwise_ttests(dv='Scores',
                             within='Time',
                             between='Group',
                             subject='Subject',
                             data=df,
                             padjust='holm',
                             interaction=True).round(5)
        # Same after random ordering of subject (issue 151)
        pt_sort = pairwise_ttests(dv='Scores',
                                  within='Time',
                                  between='Group',
                                  subject='Subject',
                                  data=df_sort,
                                  padjust='holm',
                                  interaction=True).round(5)
        assert pt_sort.equals(pt)

        # ..Changing the order of the model with ``within_first=False``.
        #   output model is now between + within + between * within.
        # https://github.com/raphaelvallat/pingouin/issues/102
        pt = pairwise_ttests(dv='Scores',
                             within='Time',
                             between='Group',
                             subject='Subject',
                             data=df,
                             padjust='holm',
                             within_first=False)
        # This should be equivalent to manually filtering dataframe to keep
        # only one level at a time of the between factor and then running
        # a within-subject pairwise T-tests.
        pt_con = pairwise_ttests(dv='Scores',
                                 within='Time',
                                 subject='Subject',
                                 padjust='holm',
                                 data=df[df['Group'] == 'Control'])
        pt_med = pairwise_ttests(dv='Scores',
                                 within='Time',
                                 subject='Subject',
                                 padjust='holm',
                                 data=df[df['Group'] == 'Meditation'])
        pt_merged = pt_con.append(pt_med)
        # T, dof and p-values should be equal
        assert np.array_equal(pt_merged['T'], pt['T'].iloc[4:])
        assert np.array_equal(pt_merged['dof'], pt['dof'].iloc[4:])
        assert np.array_equal(pt_merged['p-unc'], pt['p-unc'].iloc[4:])
        # However adjusted p-values are not equal because they are calculated
        # separately on each dataframe.
        assert not np.array_equal(pt_merged['p-corr'], pt['p-corr'].iloc[4:])
        # I also manually checked the previous lines using parametric=False and
        # one-sided test.

        # Other options
        pairwise_ttests(dv='Scores',
                        within=['Time'],
                        between=['Group'],
                        subject='Subject',
                        data=df,
                        padjust='fdr_bh',
                        alpha=.01,
                        return_desc=True,
                        parametric=False)

        # .Unbalanced data
        # ..With marginal means
        pt1 = pairwise_ttests(dv='Scores',
                              within='Time',
                              between='Group',
                              subject='Subject',
                              data=df_unb,
                              padjust='bonf')
        # ...Within main effect: OK with JASP
        assert np.array_equal(pt1.loc[:5, 'T'].round(3),
                              [-0.777, -1.344, -2.039, -0.814, -1.492, -0.627])
        assert np.array_equal(pt1.loc[:5, 'p-corr'].round(3),
                              [1., 1., 0.313, 1., 0.889, 1.])
        assert np.array_equal(pt1.loc[:5, 'BF10'].astype(float),
                              [0.273, 0.463, 1.221, 0.280, 0.554, 0.248])
        # ...Between main effect: slightly different from JASP (why?)
        #      True with or without the Welch correction...
        assert (pt1.loc[6:8, 'p-corr'] > 0.20).all()
        # ...Interaction: slightly different because JASP pool the error term
        #    across the between-subject groups.
        # Below the interaction JASP bonferroni-correct p-values, which are
        # more conservative because JASP perform all possible pairwise tests
        # jasp_pbonf = [1., 1., 1., 1., 1., 1., 1., 0.886, 1., 1., 1., 1.]
        assert (pt1.loc[9:, 'p-corr'] > 0.05).all()
        # Check that the Welch corection is applied by default
        assert not pt1['dof'].apply(lambda x: x.is_integer()).all()

        # Same after random ordering of subject (issue 151)
        pt_sort = pairwise_ttests(dv='Scores',
                                  within='Time',
                                  between='Group',
                                  subject='Subject',
                                  data=df_unb.sample(frac=1, replace=False),
                                  padjust='bonf')
        assert pt_sort.round(5).equals(pt1.round(5))

        # ..No marginal means
        pt2 = pairwise_ttests(dv='Scores',
                              within='Time',
                              between='Group',
                              subject='Subject',
                              data=df_unb,
                              padjust='bonf',
                              marginal=False)

        # This only impacts the between-subject contrast
        np.array_equal(
            (pt1['T'] == pt2['T']).astype(int),
            [1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
        assert (pt1.loc[6:8, 'dof'] < pt2.loc[6:8, 'dof']).all()

        # Without the Welch correction, check that all the DF are integer
        pt3 = pairwise_ttests(dv='Scores',
                              within='Time',
                              between='Group',
                              subject='Subject',
                              data=df_unb,
                              correction=False)
        assert pt3['dof'].apply(lambda x: x.is_integer()).all()

        # -------------------------------------------------------------------
        # Two between factors (FACTORIAL)
        # -------------------------------------------------------------------
        pt = df_aov2.pairwise_ttests(dv='Yield',
                                     between=['Blend', 'Crop'],
                                     padjust='holm').round(3)

        # The T and p-values are close but not exactly the same as JASP /
        # JAMOVI, because they both pool the error term.
        # The dof are not available in JASP, but in JAMOVI they are 18
        # everywhere, which I'm not sure to understand why...
        assert np.array_equal(pt.loc[:3, 'p-unc'] < 0.05,
                              [False, False, False, True])

        # However, the Bayes Factor of the simple main effects are the same...!
        np.array_equal(pt.loc[:3, 'BF10'].astype(float),
                       [0.374, 0.533, 0.711, 2.287])

        # Using the Welch method (all df should be non-integer)
        pt_c = df_aov2.pairwise_ttests(dv='Yield',
                                       between=['Blend', 'Crop'],
                                       padjust='holm',
                                       correction=True)
        assert not pt_c['dof'].apply(lambda x: x.is_integer()).any()

        # The ``marginal`` option has no impact here.
        assert pt.equals(
            df_aov2.pairwise_ttests(dv='Yield',
                                    between=['Blend', 'Crop'],
                                    padjust='holm',
                                    marginal=True).round(3))
        # -------------------------------------------------------------------
        # Two within subject factors
        # -------------------------------------------------------------------
        # .Marginal = True
        ptw1 = pairwise_ttests(data=df_rm2,
                               dv='Performance',
                               within=['Time', 'Metric'],
                               subject='Subject',
                               padjust='bonf',
                               marginal=True).round(3)
        # Compare the T values of the simple main effect against JASP
        # Note that the T-values of the interaction are slightly different
        # because JASP pool the error term.
        assert np.array_equal(ptw1.loc[0:3, 'T'], [5.818, 1.559, 7.714, 5.110])

        # Random sorting of the dataframe (issue 151)
        pt_sort = pairwise_ttests(data=df_rm2.sample(frac=1),
                                  dv='Performance',
                                  within=['Time', 'Metric'],
                                  subject='Subject',
                                  padjust='bonf',
                                  marginal=True).round(3)
        assert pt_sort.equals(ptw1)

        # Non-parametric (mostly for code coverage)
        pairwise_ttests(data=df_rm2,
                        dv='Performance',
                        within=['Time', 'Metric'],
                        subject='Subject',
                        parametric=False)

        # -------------------------------------------------------------------
        # ERRORS
        # -------------------------------------------------------------------
        # Both multiple between and multiple within
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores',
                            between=['Time', 'Group'],
                            within=['Time', 'Group'],
                            subject='Subject',
                            data=df)

        # Wrong input argument
        df['Group'] = 'Control'
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df)

        # -------------------------------------------------------------------
        # Missing values in repeated measurements
        # -------------------------------------------------------------------
        # 1. Parametric
        df = read_dataset('pairwise_ttests_missing')
        st = pairwise_ttests(dv='Value',
                             within='Condition',
                             subject='Subject',
                             data=df,
                             nan_policy='listwise')
        np.testing.assert_array_equal(st['dof'].to_numpy(), [7, 7, 7])
        st2 = pairwise_ttests(dv='Value',
                              within='Condition',
                              data=df,
                              subject='Subject',
                              nan_policy='pairwise')
        np.testing.assert_array_equal(st2['dof'].to_numpy(), [8, 7, 8])
        # 2. Non-parametric
        st = pairwise_ttests(dv='Value',
                             within='Condition',
                             subject='Subject',
                             data=df,
                             parametric=False,
                             nan_policy='listwise')
        np.testing.assert_array_equal(st['W-val'].to_numpy(), [9, 3, 12])
        st2 = pairwise_ttests(dv='Value',
                              within='Condition',
                              data=df,
                              subject='Subject',
                              nan_policy='pairwise',
                              parametric=False)
        # Tested against a simple for loop on combinations
        np.testing.assert_array_equal(st2['W-val'].to_numpy(), [9, 3, 21])

        with pytest.raises(ValueError):
            # Unbalanced design in repeated measurements
            df_unbalanced = df.iloc[1:, :].copy()
            pairwise_ttests(data=df_unbalanced,
                            dv='Value',
                            within='Condition',
                            subject='Subject')

        # Two within factors from other datasets and with NaN values
        df2 = read_dataset('rm_anova')
        pairwise_ttests(dv='DesireToKill',
                        within=['Disgustingness', 'Frighteningness'],
                        subject='Subject',
                        padjust='holm',
                        data=df2)

        # -------------------------------------------------------------------
        # Test tail / parametric argument (compare with JASP)
        # -------------------------------------------------------------------
        df = read_dataset('pairwise_ttests')
        # 1. Within
        # 1.1 Parametric
        # 1.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.907, 0.941, 0.405])
        assert all(pt.loc[:, 'BF10'].astype(float) < 1)
        # 1.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.595])
        assert sum(pt.loc[:, 'BF10'].astype(float) > 1) == 2
        # 1.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             data=df,
                             tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.405])

        # 1.2 Non-parametric
        # 1.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.910, 0.951, 0.483])
        # 1.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.551])
        # 1.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             within='Drug',
                             subject='Subject',
                             parametric=False,
                             data=df,
                             tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.483])

        # Compare the RBC value for wilcoxon
        from pingouin.nonparametric import wilcoxon
        x = df[df['Drug'] == 'A']['Scores'].to_numpy()
        y = df[df['Drug'] == 'B']['Scores'].to_numpy()
        assert -0.6 < wilcoxon(x, y).at['Wilcoxon', 'RBC'] < -0.4
        x = df[df['Drug'] == 'B']['Scores'].to_numpy()
        y = df[df['Drug'] == 'C']['Scores'].to_numpy()
        assert wilcoxon(x, y).at['Wilcoxon', 'RBC'].round(3) == 0.030

        # 2. Between
        # 2.1 Parametric
        # 2.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.932
        assert float(pt.loc[0, 'BF10']) < 1
        # 2.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1
        # 2.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             data=df,
                             tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1

        # 2.2 Non-parametric
        # 2.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.901
        # 2.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105
        # 2.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores',
                             between='Gender',
                             parametric=False,
                             data=df,
                             tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105

        # Compare the RBC value for MWU
        from pingouin.nonparametric import mwu
        x = df[df['Gender'] == 'M']['Scores'].to_numpy()
        y = df[df['Gender'] == 'F']['Scores'].to_numpy()
        assert round(abs(mwu(x, y).at['MWU', 'RBC']), 3) == 0.252
예제 #6
0
 def test_wilcoxon(self):
     """Test function wilcoxon"""
     wilcoxon(x, y, tail='one-sided')
     wilcoxon(x, y, tail='two-sided')
예제 #7
0
def pairwise_ttests(dv=None,
                    between=None,
                    within=None,
                    subject=None,
                    data=None,
                    parametric=True,
                    alpha=.05,
                    tail='two-sided',
                    padjust='none',
                    effsize='hedges',
                    return_desc=False,
                    export_filename=None):
    '''Pairwise T-tests.

    Parameters
    ----------
    dv : string
        Name of column containing the dependant variable.
    between : string or list with 2 elements
        Name of column(s) containing the between factor(s).
    within : string or list with 2 elements
        Name of column(s) containing the within factor(s).
    subject : string
        Name of column containing the subject identifier. Compulsory for
        contrast including a within-subject factor.
    data : pandas DataFrame
        DataFrame. Note that this function can also directly be used as a
        Pandas method, in which case this argument is no longer needed.
    parametric : boolean
        If True (default), use the parametric :py:func:`ttest` function.
        If False, use :py:func:`pingouin.wilcoxon` or :py:func:`pingouin.mwu`
        for paired or unpaired samples, respectively.
    alpha : float
        Significance level
    tail : string
        Indicates whether to return the 'two-sided' or 'one-sided' p-values
    padjust : string
        Method used for testing and adjustment of pvalues.
        Available methods are ::

        'none' : no correction
        'bonferroni' : one-step Bonferroni correction
        'holm' : step-down method using Bonferroni adjustments
        'fdr_bh' : Benjamini/Hochberg FDR correction
        'fdr_by' : Benjamini/Yekutieli FDR correction
    effsize : string or None
        Effect size type. Available methods are ::

        'none' : no effect size
        'cohen' : Unbiased Cohen d
        'hedges' : Hedges g
        'glass': Glass delta
        'eta-square' : Eta-square
        'odds-ratio' : Odds ratio
        'AUC' : Area Under the Curve
    return_desc : boolean
        If True, append group means and std to the output dataframe
    export_filename : string
        Filename (without extension) for the output file.
        If None, do not export the table.
        By default, the file will be created in the current python console
        directory. To change that, specify the filename with full path.

    Returns
    -------
    stats : DataFrame
        Stats summary ::

        'A' : Name of first measurement
        'B' : Name of second measurement
        'Paired' : indicates whether the two measurements are paired or not
        'Parametric' : indicates if (non)-parametric tests were used
        'Tail' : indicate whether the p-values are one-sided or two-sided
        'T' : T-values (only if parametric=True)
        'U' : Mann-Whitney U value (only if parametric=False and unpaired data)
        'W' : Wilcoxon W value (only if parametric=False and paired data)
        'dof' : degrees of freedom (only if parametric=True)
        'p-unc' : Uncorrected p-values
        'p-corr' : Corrected p-values
        'p-adjust' : p-values correction method
        'BF10' : Bayes Factor
        'hedges' : Hedges effect size
        'CLES' : Common language effect size

    Notes
    -----
    Data are expected to be in long-format. If your data is in wide-format,
    you can use the :py:func:`pandas.melt` function to convert from wide to
    long format.

    If ``between`` or ``within`` is a list (e.g. ['col1', 'col2']),
    the function returns 1) the pairwise T-tests between each values of the
    first column, 2) the pairwise T-tests between each values of the second
    column and 3) the interaction between col1 and col2. The interaction is
    dependent of the order of the list, so ['col1', 'col2'] will not yield the
    same results as ['col2', 'col1'].

    In other words, if ``between`` is a list with two elements, the output
    model is between1 + between2 + between1 * between2.

    Similarly, if `within`` is a list with two elements, the output model is
    within1 + within2 + within1 * within2.

    If both ``between`` and ``within`` are specified, the function return
    within + between + within * between.

    Missing values in repeated measurements are automatically removed using the
    :py:func:`pingouin.remove_rm_na` function. However, you should be very
    careful since it can result in undesired values removal (especially for the
    interaction effect). We strongly recommend that you preprocess your data
    and remove the missing values before using this function.

    This function has been tested against the `pairwise.t.test` R function.

    See Also
    --------
    ttest : T-test.
    wilcoxon : Non-parametric test for paired samples.
    mwu : Non-parametric test for independent samples.

    Examples
    --------
    1. One between-factor

    >>> from pingouin import pairwise_ttests, read_dataset
    >>> df = read_dataset('mixed_anova.csv')
    >>> post_hocs = pairwise_ttests(dv='Scores', between='Group', data=df)

    2. One within-factor

    >>> post_hocs = pairwise_ttests(dv='Scores', within='Time',
    ...                             subject='Subject', data=df)
    >>> print(post_hocs)  # doctest: +SKIP

    3. Non-parametric pairwise paired test (wilcoxon)

    >>> pairwise_ttests(dv='Scores', within='Time', subject='Subject',
    ...                 data=df, parametric=False)  # doctest: +SKIP

    4. Within + Between + Within * Between with corrected p-values

    >>> posthocs = pairwise_ttests(dv='Scores', within='Time',
    ...                            subject='Subject', between='Group',
    ...                            padjust='bonf', data=df)

    5. Between1 + Between2 + Between1 * Between2

    >>> posthocs = pairwise_ttests(dv='Scores', between=['Group', 'Time'],
    ...                            data=df)
    '''
    from pingouin.parametric import ttest
    from pingouin.nonparametric import wilcoxon, mwu

    # Safety checks
    _check_dataframe(dv=dv,
                     between=between,
                     within=within,
                     subject=subject,
                     effects='all',
                     data=data)

    if tail not in ['one-sided', 'two-sided']:
        raise ValueError('Tail not recognized')

    if not isinstance(alpha, float):
        raise ValueError('Alpha must be float')

    # Check if we have multiple between or within factors
    multiple_between = False
    multiple_within = False
    contrast = None

    if isinstance(between, list):
        if len(between) > 1:
            multiple_between = True
            contrast = 'multiple_between'
            assert all([b in data.keys() for b in between])
        else:
            between = between[0]

    if isinstance(within, list):
        if len(within) > 1:
            multiple_within = True
            contrast = 'multiple_within'
            assert all([w in data.keys() for w in within])
        else:
            within = within[0]

    if all([multiple_within, multiple_between]):
        raise ValueError("Multiple between and within factors are",
                         "currently not supported. Please select only one.")

    # Check the other cases
    if isinstance(between, str) and within is None:
        contrast = 'simple_between'
        assert between in data.keys()
    if isinstance(within, str) and between is None:
        contrast = 'simple_within'
        assert within in data.keys()
    if isinstance(between, str) and isinstance(within, str):
        contrast = 'within_between'
        assert all([between in data.keys(), within in data.keys()])

    # Initialize empty variables
    stats = pd.DataFrame([])
    ddic = {}

    if contrast in ['simple_within', 'simple_between']:
        # OPTION A: SIMPLE MAIN EFFECTS, WITHIN OR BETWEEN
        paired = True if contrast == 'simple_within' else False
        col = within if contrast == 'simple_within' else between
        # Remove NAN in repeated measurements
        if contrast == 'simple_within' and data[dv].isnull().values.any():
            data = remove_rm_na(dv=dv,
                                within=within,
                                subject=subject,
                                data=data)
        # Extract effects
        labels = data[col].unique().tolist()
        for l in labels:
            ddic[l] = data.loc[data[col] == l, dv].values
        # Number and labels of possible comparisons
        if len(labels) >= 2:
            combs = list(combinations(labels, 2))
        else:
            raise ValueError('Columns must have at least two unique values.')
        # Initialize vectors
        for comb in combs:
            col1, col2 = comb
            x = ddic.get(col1)
            y = ddic.get(col2)
            if parametric:
                df_ttest = ttest(x, y, paired=paired, tail=tail)
                # Compute exact CLES
                df_ttest['CLES'] = compute_effsize(x,
                                                   y,
                                                   paired=paired,
                                                   eftype='CLES')
            else:
                if paired:
                    df_ttest = wilcoxon(x, y, tail=tail)
                else:
                    df_ttest = mwu(x, y, tail=tail)
            # Compute Hedges / Cohen
            ef = compute_effsize(x=x, y=y, eftype=effsize, paired=paired)
            stats = _append_stats_dataframe(stats, x, y, col1, col2, alpha,
                                            paired, tail, df_ttest, ef,
                                            effsize)
            stats['Contrast'] = col

        # Multiple comparisons
        padjust = None if stats['p-unc'].size <= 1 else padjust
        if padjust is not None:
            if padjust.lower() != 'none':
                _, stats['p-corr'] = multicomp(stats['p-unc'].values,
                                               alpha=alpha,
                                               method=padjust)
                stats['p-adjust'] = padjust
        else:
            stats['p-corr'] = None
            stats['p-adjust'] = None
    else:
        # B1: BETWEEN1 + BETWEEN2 + BETWEEN1 * BETWEEN2
        # B2: WITHIN1 + WITHIN2 + WITHIN1 * WITHIN2
        # B3: WITHIN + BETWEEN + WITHIN * BETWEEN
        if contrast == 'multiple_between':
            # B1
            factors = between
            fbt = factors
            fwt = [None, None]
            # eft = ['between', 'between']
            paired = False
        elif contrast == 'multiple_within':
            # B2
            factors = within
            fbt = [None, None]
            fwt = factors
            # eft = ['within', 'within']
            paired = True
        else:
            # B3
            factors = [within, between]
            fbt = [None, between]
            fwt = [within, None]
            # eft = ['within', 'between']
            paired = False

        for i, f in enumerate(factors):
            stats = stats.append(pairwise_ttests(dv=dv,
                                                 between=fbt[i],
                                                 within=fwt[i],
                                                 subject=subject,
                                                 data=data,
                                                 parametric=parametric,
                                                 alpha=alpha,
                                                 tail=tail,
                                                 padjust=padjust,
                                                 effsize=effsize,
                                                 return_desc=return_desc),
                                 ignore_index=True,
                                 sort=False)

        # Rename effect size to generic name
        stats.rename(columns={effsize: 'efsize'}, inplace=True)

        # Then compute the interaction between the factors
        labels_fac1 = data[factors[0]].unique().tolist()
        labels_fac2 = data[factors[1]].unique().tolist()
        comb_fac1 = list(combinations(labels_fac1, 2))
        comb_fac2 = list(combinations(labels_fac2, 2))
        lc_fac1 = len(comb_fac1)
        lc_fac2 = len(comb_fac2)

        for lw in labels_fac1:
            for l in labels_fac2:
                tmp = data.loc[data[factors[0]] == lw]
                ddic[lw, l] = tmp.loc[tmp[factors[1]] == l, dv].values

        # Pairwise comparisons
        combs = list(product(labels_fac1, comb_fac2))
        for comb in combs:
            fac1, (col1, col2) = comb
            x = ddic.get((fac1, col1))
            y = ddic.get((fac1, col2))
            if parametric:
                df_ttest = ttest(x, y, paired=paired, tail=tail)
                # Compute exact CLES
                df_ttest['CLES'] = compute_effsize(x,
                                                   y,
                                                   paired=paired,
                                                   eftype='CLES')
            else:
                if paired:
                    df_ttest = wilcoxon(x, y, tail=tail)
                else:
                    df_ttest = mwu(x, y, tail=tail)
            ef = compute_effsize(x=x, y=y, eftype=effsize, paired=paired)
            stats = _append_stats_dataframe(stats, x, y, col1, col2, alpha,
                                            paired, tail, df_ttest, ef,
                                            effsize, fac1)

        # Update the Contrast columns
        txt_inter = factors[0] + ' * ' + factors[1]
        idxitr = np.arange(lc_fac1 + lc_fac2, stats.shape[0]).tolist()
        stats.loc[idxitr, 'Contrast'] = txt_inter

        # Multi-comparison columns
        if padjust is not None and padjust.lower() != 'none':
            _, pcor = multicomp(stats.loc[idxitr, 'p-unc'].values,
                                alpha=alpha,
                                method=padjust)
            stats.loc[idxitr, 'p-corr'] = pcor
            stats.loc[idxitr, 'p-adjust'] = padjust

    # ---------------------------------------------------------------------
    stats['Paired'] = stats['Paired'].astype(bool)
    stats['Parametric'] = parametric

    # Round effect size and CLES
    stats[['efsize', 'CLES']] = stats[['efsize', 'CLES']].round(3)

    # Reorganize column order
    col_order = [
        'Contrast', 'Time', 'A', 'B', 'mean(A)', 'std(A)', 'mean(B)', 'std(B)',
        'Paired', 'Parametric', 'T', 'U', 'W', 'dof', 'tail', 'p-unc',
        'p-corr', 'p-adjust', 'BF10', 'CLES', 'efsize'
    ]

    if return_desc is False:
        stats.drop(columns=['mean(A)', 'mean(B)', 'std(A)', 'std(B)'],
                   inplace=True)

    stats = stats.reindex(columns=col_order)
    stats.dropna(how='all', axis=1, inplace=True)

    # Rename effect size column
    stats.rename(columns={'efsize': effsize}, inplace=True)

    # Rename Time columns
    if contrast in ['multiple_within', 'multiple_between', 'within_between']:
        stats['Time'].fillna('-', inplace=True)
        stats.rename(columns={'Time': factors[0]}, inplace=True)

    if export_filename is not None:
        _export_table(stats, export_filename)
    return stats
예제 #8
0
    def test_pairwise_ttests(self):
        """Test function pairwise_ttests.
        Tested against the pairwise.t.test R function."""
        df = read_dataset('mixed_anova.csv')
        # Within + Between + Within * Between
        pairwise_ttests(dv='Scores', within='Time', between='Group',
                        subject='Subject', data=df, alpha=.01)
        pairwise_ttests(dv='Scores', within=['Time'], between=['Group'],
                        subject='Subject', data=df, padjust='fdr_bh',
                        return_desc=True)
        # Simple within
        # In R:
        # >>> pairwise.t.test(df$Scores, df$Time, pool.sd = FALSE,
        # ...                 p.adjust.method = 'holm', paired = TRUE)
        pt = pairwise_ttests(dv='Scores', within='Time', subject='Subject',
                             data=df, return_desc=True, padjust='holm')
        np.testing.assert_array_equal(pt.loc[:, 'p-corr'].round(3),
                                      [0.174, 0.024, 0.310])
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.087, 0.008, 0.310])
        pairwise_ttests(dv='Scores', within='Time', subject='Subject',
                        data=df, parametric=False, return_desc=True)
        # Simple between
        # In R:
        # >>> pairwise.t.test(df$Scores, df$Group, pool.sd = FALSE)
        pt = pairwise_ttests(dv='Scores', between='Group', data=df).round(3)
        assert pt.loc[0, 'p-unc'] == 0.023
        pairwise_ttests(dv='Scores', between='Group',
                        data=df, padjust='bonf', tail='one-sided',
                        effsize='cohen', parametric=False,
                        export_filename='test_export.csv')

        # Two between factors
        pt = pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df,
                             padjust='holm').round(3)
        pairwise_ttests(dv='Scores', between=['Time', 'Group'], data=df,
                        padjust='holm', parametric=False)
        # .. with no interaction
        pt_no_inter = df.pairwise_ttests(dv='Scores',
                                         between=['Time', 'Group'],
                                         interaction=False,
                                         padjust='holm').round(3)
        assert pt.drop(columns=['Time']).iloc[0:4, :].equals(pt_no_inter)

        # Two within subject factors
        ptw = pairwise_ttests(data=df, dv='Scores', within=['Group', 'Time'],
                              subject='Subject', padjust='bonf',
                              parametric=False).round(3)
        ptw_no_inter = df.pairwise_ttests(dv='Scores',
                                          within=['Group', 'Time'],
                                          subject='Subject', padjust='bonf',
                                          interaction=False,
                                          parametric=False).round(3)
        assert ptw.drop(columns=['Group']).iloc[0:4, :].equals(ptw_no_inter)

        # Both multiple between and multiple within
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between=['Time', 'Group'],
                            within=['Time', 'Group'], subject='Subject',
                            data=df)

        # Wrong input argument
        df['Group'] = 'Control'
        with pytest.raises(ValueError):
            pairwise_ttests(dv='Scores', between='Group', data=df)

        # Missing values in repeated measurements
        # 1. Parametric
        df = read_dataset('pairwise_ttests_missing')
        st = pairwise_ttests(dv='Value', within='Condition', subject='Subject',
                             data=df, nan_policy='listwise')
        np.testing.assert_array_equal(st['dof'].values, [7, 7, 7])
        st2 = pairwise_ttests(dv='Value', within='Condition', data=df,
                              subject='Subject', nan_policy='pairwise')
        np.testing.assert_array_equal(st2['dof'].values, [8, 7, 8])
        # 2. Non-parametric
        st = pairwise_ttests(dv='Value', within='Condition', subject='Subject',
                             data=df, parametric=False, nan_policy='listwise')
        np.testing.assert_array_equal(st['W-val'].values, [9, 3, 12])
        st2 = pairwise_ttests(dv='Value', within='Condition', data=df,
                              subject='Subject', nan_policy='pairwise',
                              parametric=False)
        # Tested against a simple for loop on combinations
        np.testing.assert_array_equal(st2['W-val'].values, [9, 3, 21])

        with pytest.raises(ValueError):
            # Unbalanced design in repeated measurements
            df_unbalanced = df.iloc[1:, :].copy()
            pairwise_ttests(data=df_unbalanced, dv='Value', within='Condition',
                            subject='Subject')

        # Two within factors from other datasets and with NaN values
        df2 = read_dataset('rm_anova')
        pairwise_ttests(dv='DesireToKill',
                        within=['Disgustingness', 'Frighteningness'],
                        subject='Subject', padjust='holm', data=df2)

        # Compare with JASP tail / parametric argument
        df = read_dataset('pairwise_ttests')
        # 1. Within
        # 1.1 Parametric
        # 1.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             data=df, tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.907, 0.941, 0.405])
        assert all(pt.loc[:, 'BF10'].astype(float) < 1)
        # 1.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             data=df, tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.595])
        assert sum(pt.loc[:, 'BF10'].astype(float) > 1) == 2
        # 1.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             data=df, tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.093, 0.059, 0.405])

        # 1.2 Non-parametric
        # 1.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             parametric=False, data=df, tail='greater')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.910, 0.951, 0.482])
        # 1.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             parametric=False, data=df, tail='less')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.554])
        # 1.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores', within='Drug', subject='Subject',
                             parametric=False, data=df, tail='one-sided')
        np.testing.assert_array_equal(pt.loc[:, 'p-unc'].round(3),
                                      [0.108, 0.060, 0.482])

        # Compare the RBC value for wilcoxon
        from pingouin.nonparametric import wilcoxon
        x = df[df['Drug'] == 'A']['Scores'].values
        y = df[df['Drug'] == 'B']['Scores'].values
        assert -0.6 < wilcoxon(x, y).at['Wilcoxon', 'RBC'] < -0.4
        x = df[df['Drug'] == 'B']['Scores'].values
        y = df[df['Drug'] == 'C']['Scores'].values
        assert wilcoxon(x, y).at['Wilcoxon', 'RBC'].round(3) == 0.030

        # 2. Between
        # 2.1 Parametric
        # 2.1.1 Tail is greater
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             data=df, tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1
        # 2.1.2 Tail is less
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             data=df, tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.932
        assert float(pt.loc[0, 'BF10']) < 1
        # 2.1.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             data=df, tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.068
        assert float(pt.loc[0, 'BF10']) > 1

        # 2.2 Non-parametric
        # 2.2.1 Tail is greater
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             parametric=False, data=df, tail='greater')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105
        # 2.2.2 Tail is less
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             parametric=False, data=df, tail='less')
        assert pt.loc[0, 'p-unc'].round(3) == 0.901
        # 2.2.3 Tail is one-sided: smallest p-value
        pt = pairwise_ttests(dv='Scores', between='Gender',
                             parametric=False, data=df, tail='one-sided')
        assert pt.loc[0, 'p-unc'].round(3) == 0.105

        # Compare the RBC value for MWU
        from pingouin.nonparametric import mwu
        x = df[df['Gender'] == 'M']['Scores'].values
        y = df[df['Gender'] == 'F']['Scores'].values
        assert abs(mwu(x, y).at['MWU', 'RBC']) == 0.252
예제 #9
0
def tost(x, y, paired=False, parametric=True, bound=0.3, correction=False):
    """T-test.

    Parameters
    ----------
    x : array_like
        First set of observations.
    y : array_like or float
        Second set of observations. If y is a single value, a one-sample T-test
        is computed.
    paired : boolean
        Specify whether the two observations are related (i.e. repeated
        measures) or independent.
    parametric : boolean
        If True (default), use the parametric :py:func:`ttest` function.
        If False, use :py:func:`pingouin.wilcoxon` or :py:func:`pingouin.mwu`
        for paired or unpaired samples, respectively.
    bound : float
        Magnitude of region of similarity
    correction : auto or boolean
        Specify whether or not to correct for unequal variances using Welch separate variances T-test

    Returns
    -------
    stats : pandas DataFrame
        TOST summary ::

        'upper' : upper interval p-value
        'lower' : lower interval p-value
        'p-val' : TOST p-value

    """
    if parametric:
        df_ttesta = ttest(list(np.asarray(y) + bound),
                          x,
                          paired=paired,
                          tail='one-sided',
                          correction=correction)
        df_ttestb = ttest(list(np.asarray(x) + bound),
                          y,
                          paired=paired,
                          tail='one-sided',
                          correction=correction)
        if df_ttestb.loc['T-test', 'T'] < 0:
            df_ttestb.loc['T-test',
                          'p-val'] = 1 - df_ttestb.loc['T-test', 'p-val']
        if df_ttesta.loc['T-test', 'T'] < 0:
            df_ttesta.loc['T-test',
                          'p-val'] = 1 - df_ttesta.loc['T-test', 'p-val']
        if df_ttestb.loc['T-test', 'p-val'] >= df_ttesta.loc['T-test',
                                                             'p-val']:
            pval = df_ttestb.loc['T-test', 'p-val']
            lpval = df_ttesta.loc['T-test', 'p-val']
        else:
            pval = df_ttesta.loc['T-test', 'p-val']
            lpval = df_ttestb.loc['T-test', 'p-val']
    else:
        if paired:
            df_ttesta = wilcoxon(list(np.asarray(y) + bound),
                                 x,
                                 tail='greater')
            df_ttestb = wilcoxon(list(np.asarray(x) + bound),
                                 y,
                                 tail='greater')
            if df_ttestb.loc['Wilcoxon', 'p-val'] >= df_ttesta.loc['Wilcoxon',
                                                                   'p-val']:
                pval = df_ttestb.loc['Wilcoxon', 'p-val']
                lpval = df_ttesta.loc['Wilcoxon', 'p-val']
            else:
                pval = df_ttesta.loc['Wilcoxon', 'p-val']
                lpval = df_ttestb.loc['Wilcoxon', 'p-val']
        else:
            df_ttesta = mwu(list(np.asarray(y) + bound), x, tail='greater')
            df_ttestb = mwu(list(np.asarray(x) + bound), y, tail='greater')
            if df_ttestb.loc['MWU', 'p-val'] >= df_ttesta.loc['MWU', 'p-val']:
                pval = df_ttestb.loc['MWU', 'p-val']
                lpval = df_ttesta.loc['MWU', 'p-val']
            else:
                pval = df_ttesta.loc['MWU', 'p-val']
                lpval = df_ttestb.loc['MWU', 'p-val']
    stats = {'p-val': pval, 'upper': pval, 'lower': lpval}

    # Convert to dataframe
    stats = pd.DataFrame.from_records(stats, index=['TOST'])

    col_order = ['upper', 'lower', 'p-val']
    stats = stats.reindex(columns=col_order)
    stats.dropna(how='all', axis=1, inplace=True)
    return stats