Python read_dataset Examples, pingouin.read_dataset Python Examples

Example #1

0

Show file

File: test_plotting.py Project: vishalbelsare/pingouin

 def test_plot_paired(self):
     """Test plot_paired()"""
     df = read_dataset('mixed_anova')
     df = df.query("Group == 'Meditation' and Subject > 40 and "
                   "(Time == 'August' or Time == 'June')").copy()
     df.loc[[101, 161], 'Scores'] = 6
     ax = plot_paired(data=df, dv='Scores', within='Time',
                      subject='Subject')
     assert isinstance(ax, matplotlib.axes.Axes)
     _, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))
     plot_paired(data=df, dv='Scores', within='Time',
                 subject='Subject', boxplot=False, ax=ax1)
     plot_paired(data=df, dv='Scores', within='Time',
                 subject='Subject', order=['June', 'August'],
                 ax=ax2)
     plot_paired(data=df, dv='Scores', within='Time',
                 subject='Subject', order=['June', 'August'],
                 boxplot_in_front=True, ax=ax2)
     # Test with more than two within levels
     df = read_dataset('mixed_anova')
     df = df.query("Group == 'Meditation' and Subject > 40").copy()
     df.loc[[101, 161], 'Scores'] = 6
     plot_paired(data=df, dv='Scores', within='Time', subject='Subject',
                 order=['January', 'June', 'August'])
     plot_paired(data=df, dv='Scores', within='Time', subject='Subject',
                 order=['January', 'June', 'August'], orient='h')
     plot_paired(data=df, dv='Scores', within='Time', subject='Subject',
                 orient='h', boxplot=False)
     plt.close('all')

Example #2

0

Show file

    def test_rm_anova2(self):
        """Test function rm_anova2.
        Compare with JASP.
        """
        data = read_dataset('rm_anova2')
        aov = rm_anova(data=data, subject='Subject', within=['Time', 'Metric'],
                       dv='Performance').round(3)
        array_equal(aov.loc[:, 'MS'], [828.817, 682.617, 112.217])
        array_equal(aov.loc[:, 'F'], [33.852, 26.959, 12.632])
        array_equal(aov.loc[:, 'np2'], [0.790, 0.750, 0.584])
        array_equal(aov.loc[:, 'eps'], [1., 0.969, 0.727])

        # With different effect sizes
        aov = rm_anova(data=data, subject='Subject', within=['Time', 'Metric'],
                       dv='Performance', effsize="n2").round(3)
        array_equal(aov.loc[:, 'n2'], [0.255, 0.419, 0.069])

        aov = rm_anova(data=data, subject='Subject', within=['Time', 'Metric'],
                       dv='Performance', effsize="ng2").round(3)
        array_equal(aov.loc[:, 'ng2'], [0.254, 0.359, 0.084])

        # 2 factors with missing values. Cannot compare with JASP directly
        # because Pingouin applies an automatic removal of missing values
        # (on the last factor). JASP uses a regression-based approach which
        # can handle missing values.
        df2 = read_dataset('rm_missing')
        df2.rm_anova(dv='BOLD', within=['Session', 'Time'], subject='Subj')

        # Error: more than two factors
        with pytest.raises(ValueError):
            df2.rm_anova(dv='BOLD', within=['Session', 'Time', 'Wrong'],
                         subject='Subj')

Example #3

0

Show file

File: test_parametric.py Project: palline1/pingouin

    def test_rm_anova2(self):
        """Test function rm_anova2.
        Compare with JASP."""
        data = read_dataset('rm_anova2')
        aov = rm_anova(data=data,
                       subject='Subject',
                       within=['Time', 'Metric'],
                       dv='Performance',
                       export_filename='test_export.csv').round(3)
        array_equal(aov.loc[:, 'MS'].values, [828.817, 682.617, 112.217])
        array_equal(aov.loc[:, 'F'].values, [33.852, 26.959, 12.632])
        array_equal(aov.loc[:, 'np2'].values, [0.790, 0.750, 0.584])
        assert aov.loc[0, "eps"] == 1.000
        assert aov.loc[1, "eps"] == 0.969
        assert aov.loc[2, "eps"] >= 0.500  # 0.5 is the lower bound

        # With missing values
        df2 = read_dataset('rm_missing')
        df2.rm_anova(dv='BOLD', within=['Session', 'Time'], subject='Subj')

        # Error: more than two factors
        with pytest.raises(ValueError):
            df2.rm_anova(dv='BOLD',
                         within=['Session', 'Time', 'Wrong'],
                         subject='Subj')

Example #4

0

Show file

File: test_reliability.py Project: zzy17667036/pingouin

 def test_cronbach_alpha(self):
     """Test function cronbach_alpha.
     Compare results with the R package psych.
     Note that this function returns slightly different results when
     missing values are present in data.
     """
     df = read_dataset('cronbach_alpha')
     alpha, ci = cronbach_alpha(data=df, items='Items', scores='Scores',
                                subject='Subj')
     assert round(alpha, 3) == 0.592
     assert ci[0] == .195
     assert ci[1] == .840
     # With missing values
     df.loc[2, 'Scores'] = np.nan
     cronbach_alpha(data=df, items='Items', scores='Scores',
                    subject='Subj')
     # In R = psych:alpha(data, use="complete.obs")
     cronbach_alpha(data=df, items='Items', scores='Scores',
                    subject='Subj', nan_policy='listwise')
     # Wide format
     data = read_dataset('cronbach_wide_missing')
     alpha, _ = cronbach_alpha(data=data)
     assert round(alpha, 2) == .73
     alpha, _ = cronbach_alpha(data=data, nan_policy='listwise')
     assert round(alpha, 2) == .80

Example #5

0

Show file

File: test_parametric.py Project: agamemnonc/pingouin

    def test_rm_anova2(self):
        """Test function rm_anova2.
        Compare with JASP."""
        data = read_dataset('rm_anova2')
        aov = rm_anova(data=data,
                       subject='Subject',
                       within=['Time', 'Metric'],
                       dv='Performance',
                       export_filename='test_export.csv').round(3)
        assert aov.loc[0, "MS"] == 828.817
        assert aov.loc[1, "MS"] == 682.617
        assert aov.loc[2, "MS"] == 112.217
        assert aov.loc[0, "F"] == 33.852
        assert aov.loc[1, "F"] == 26.959
        assert aov.loc[2, "F"] == 12.632
        assert aov.loc[0, "np2"] == 0.790
        assert aov.loc[1, "np2"] == 0.750
        assert aov.loc[2, "np2"] == 0.584
        assert aov.loc[0, "eps"] == 1.000
        assert aov.loc[1, "eps"] == 0.969
        assert aov.loc[2, "eps"] >= 0.500  # 0.5 is the lower bound

        # With missing values
        df2 = read_dataset('rm_missing')
        df2.rm_anova(dv='BOLD', within=['Session', 'Time'], subject='Subj')

Example #6

0

Show file

File: test_pairwise.py Project: zzy17667036/pingouin

    def test_pairwise_gameshowell(self):
        """Test function pairwise_gameshowell.

        The p-values are slightly different because of a different algorithm
        used to calculate the studentized range approximation, but
        significance should be the same.
        """
        # Compare with R package `userfriendlyscience` - Hair color dataset
        # Update Feb 2021: The userfriendlyscience package has been removed
        # from CRAN.
        df = read_dataset('anova')
        stats = pairwise_gameshowell(dv='Pain threshold', between='Hair color',
                                     data=df)
        assert np.array_equal(np.abs(stats['T'].round(2)),
                              [2.47, 1.42, 1.75, 4.09, 1.11, 3.56])
        assert np.array_equal(stats['df'].round(2),
                              [7.91, 7.94, 6.56, 8.0, 6.82, 6.77])
        # JASP: [0.1401, 0.5228, 0.3715, 0.0148, 0.6980, 0.0378]
        # Pingouin: [0.1401, 0.5220, 0.3722, 0.0148, 0.6848, 0.0378]
        assert np.allclose([0.1401, 0.5228, 0.3715, 0.0148, 0.6980, 0.0378],
                           stats.loc[:, 'pval'].to_numpy().round(3),
                           atol=0.05)
        sig = stats['pval'].apply(lambda x: 'Yes' if x < 0.05 else
                                  'No').to_numpy()
        assert np.array_equal(sig, ['No', 'No', 'No', 'Yes', 'No', 'Yes'])
        # Compare with JASP in the Palmer Penguins dataset
        df = read_dataset("penguins")
        stats = pairwise_gameshowell(data=df, dv="body_mass_g",
                                     between="species").round(4)
        assert np.array_equal(stats['A'], ["Adelie", "Adelie", "Chinstrap"])
        assert np.array_equal(stats['B'], ["Chinstrap", "Gentoo", "Gentoo"])
        assert np.array_equal(stats['diff'], [-32.426, -1375.354, -1342.928])
        assert np.array_equal(stats['se'], [59.7064, 58.8109, 65.1028])
        assert np.array_equal(stats['df'], [152.4548, 249.6426, 170.4044])
        assert np.array_equal(stats['T'], [-0.5431, -23.3860, -20.6278])
        # P-values JASP: [0.8502, 0.0000, 0.0000]
        # P-values Pingouin: [0.8339, 0.0010, 0.0010]
        sig = stats['pval'].apply(lambda x: 'Yes' if x < 0.05 else
                                  'No').to_numpy()
        assert np.array_equal(sig, ['No', 'Yes', 'Yes'])

        # Same but with balanced group
        df_balanced = df.groupby('species').head(20).copy()
        # To complicate things, let's encode between as a categorical
        df_balanced['species'] = df_balanced['species'].astype('category')
        stats = pairwise_gameshowell(data=df_balanced, dv="body_mass_g",
                                     between="species").round(4)
        assert np.array_equal(stats['A'], ["Adelie", "Adelie", "Chinstrap"])
        assert np.array_equal(stats['B'], ["Chinstrap", "Gentoo", "Gentoo"])
        assert np.array_equal(stats['diff'], [-142.5, -1457.5, -1315.])
        assert np.array_equal(stats['se'], [104.5589, 163.1546, 154.1104])
        assert np.array_equal(stats['df'], [35.5510, 30.8479, 26.4576])
        assert np.array_equal(stats['T'], [-1.3629, -8.9332, -8.5328])
        # P-values JASP: [0.3709, 0.0000, 0.0000]
        # P-values Pingouin: [0.3719, 0.0010, 0.0010]
        sig = stats['pval'].apply(lambda x: 'Yes' if x < 0.05 else
                                  'No').to_numpy()
        assert np.array_equal(sig, ['No', 'Yes', 'Yes'])

Example #7

0

Show file

File: test_pairwise.py Project: vishalbelsare/pingouin

    def test_pairwise_tukey(self):
        """Test function pairwise_tukey.

        The p-values are slightly different because of a different algorithm
        used to calculate the studentized range approximation, but
        significance should be the same.
        """
        # Compare with R package `userfriendlyscience` - Hair color dataset
        # Update Feb 2021: The userfriendlyscience package has been removed
        # from CRAN.
        df = read_dataset('anova')
        stats = pairwise_tukey(dv='Pain threshold',
                               between='Hair color',
                               data=df)
        # JASP: [0.0741, 0.4356, 0.4147, 0.0037, 0.7893, 0.0366]
        # Pingouin: [0.0742, 0.4369, 0.4160, 0.0037, 0.7697, 0.0367]
        assert np.allclose([0.074, 0.435, 0.415, 0.004, 0.789, 0.037],
                           stats.loc[:, 'p-tukey'].to_numpy().round(3),
                           atol=0.05)
        # Compare with JASP in the Palmer Penguins dataset
        # The between factor (Species) is unbalanced.
        df = read_dataset("penguins")
        stats = df.pairwise_tukey(dv="body_mass_g", between="species").round(4)
        assert np.array_equal(stats['A'], ["Adelie", "Adelie", "Chinstrap"])
        assert np.array_equal(stats['B'], ["Chinstrap", "Gentoo", "Gentoo"])
        assert np.array_equal(stats['diff'], [-32.426, -1375.354, -1342.928])
        # SE is different for each group (Tukey-Kramer)
        assert np.array_equal(stats['se'], [67.5117, 56.1480, 69.8569])
        assert np.array_equal(stats['T'], [-0.4803, -24.4952, -19.2240])
        # P-values JASP: [0.8807, 0.0000, 0.0000]
        # P-values Pingouin: [0.8694, 0.0010, 0.0010]
        sig = stats['p-tukey'].apply(lambda x: 'Yes'
                                     if x < 0.05 else 'No').to_numpy()
        assert np.array_equal(sig, ['No', 'Yes', 'Yes'])

        # Same but with balanced group
        df_balanced = df.groupby('species').head(20).copy()
        # To complicate things, let's encode between as a categorical
        df_balanced['species'] = df_balanced['species'].astype('category')
        stats = df_balanced.pairwise_tukey(dv="body_mass_g",
                                           between="species").round(4)
        assert np.array_equal(stats['A'], ["Adelie", "Adelie", "Chinstrap"])
        assert np.array_equal(stats['B'], ["Chinstrap", "Gentoo", "Gentoo"])
        assert np.array_equal(stats['diff'], [-142.5, -1457.5, -1315.])
        # SE is the same for all groups (Tukey HSD)
        assert np.array_equal(stats['se'], [142.9475, 142.9475, 142.9475])
        assert np.array_equal(stats['T'], [-0.9969, -10.1961, -9.1992])
        # P-values JASP: [0.5818, 0.0000, 0.0000]
        # P-values Pingouin: [0.5766, 0.0010, 0.0010]
        sig = stats['p-tukey'].apply(lambda x: 'Yes'
                                     if x < 0.05 else 'No').to_numpy()
        assert np.array_equal(sig, ['No', 'Yes', 'Yes'])

Example #8

0

Show file

File: test_correlation.py Project: snijesh/pingouin

 def test_partial_corr(self):
     """Test function partial_corr.
     Compare with the R package ppcor and JASP.
     """
     df = read_dataset('partial_corr')
     pc = partial_corr(data=df, x='x', y='y', covar='cv1')
     assert round(pc.at['pearson', 'r'], 3) == 0.568
     pc = df.partial_corr(x='x', y='y', covar='cv1', method='spearman')
     # Warning: Spearman slightly different than ppcor package, is this
     # caused by difference in Python / R when computing ranks?
     # assert pc.at['spearman', 'r'] == 0.578
     # Partial correlation of x and y controlling for multiple covariates
     pc = partial_corr(data=df, x='x', y='y', covar=['cv1'])
     pc = partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'])
     assert round(pc.at['pearson', 'r'], 3) == 0.493
     pc = partial_corr(data=df,
                       x='x',
                       y='y',
                       covar=['cv1', 'cv2', 'cv3'],
                       method='percbend')
     # Semi-partial correlation
     df.partial_corr(x='x', y='y', y_covar='cv1')
     pc = df.partial_corr(x='x', y='y', x_covar=['cv1', 'cv2', 'cv3'])
     assert round(pc.at['pearson', 'r'], 3) == 0.463
     pc = df.partial_corr(x='x', y='y', y_covar=['cv1', 'cv2', 'cv3'])
     assert round(pc.at['pearson', 'r'], 3) == 0.421
     partial_corr(data=df,
                  x='x',
                  y='y',
                  x_covar='cv1',
                  y_covar=['cv2', 'cv3'],
                  method='spearman')
     with pytest.raises(ValueError):
         partial_corr(data=df, x='x', y='y', covar='cv2', x_covar='cv1')

Example #9

0

Show file

File: test_utils.py Project: agamemnonc/pingouin

 def test_remove_rm_na(self):
     """Test function remove_rm_na."""
     # With one within factor
     df = pd.DataFrame({'Time': ['A', 'A', 'B', 'B'],
                        'Values': [1.52, np.nan, 8.2, 3.4],
                        'Ss': [0, 1, 0, 1]})
     df = remove_rm_na(dv='Values', within='Time', subject='Ss', data=df)
     assert df['Ss'].nunique() == 1
     # With multiple factor
     df = read_dataset('rm_missing')
     stats = remove_rm_na(data=df, dv='BOLD', within=['Session', 'Time'],
                          subject='Subj')
     assert stats['BOLD'].isnull().sum() == 0
     assert stats['Memory'].isnull().sum() == 5
     # Multiple factors
     stats = remove_rm_na(data=df, within=['Time', 'Session'],
                          subject='Subj')
     assert stats['BOLD'].isnull().sum() == 0
     assert stats['Memory'].isnull().sum() == 0
     # Aggregation
     remove_rm_na(data=df, dv='BOLD', within='Session', subject='Subj')
     remove_rm_na(data=df, within='Session', subject='Subj',
                  aggregate='sum')
     remove_rm_na(data=df, within='Session', subject='Subj',
                  aggregate='first')
     df.loc['Subj', 1] = np.nan
     with pytest.raises(ValueError):
         remove_rm_na(data=df, within='Session', subject='Subj')

Example #10

0

Show file

File: test_plotting.py Project: papr/pingouin

 def test_plot_rm_corr(self):
     """Test plot_shift()."""
     df = read_dataset('rm_corr')
     g = plot_rm_corr(data=df, x='pH', y='PacO2', subject='Subject')
     g = plot_rm_corr(data=df, x='pH', y='PacO2', subject='Subject',
                      legend=False)
     assert isinstance(g, sns.FacetGrid)

Example #11

0

Show file

File: test_reliability.py Project: zzy17667036/pingouin

    def test_intraclass_corr(self):
        """Test function intraclass_corr
        Compare to the ICC function of the R package psych
        """
        # Example from the R package psych (Shrout and Fleiss 1979)
        df_psych = pd.DataFrame({'S': [1, 2, 3, 4, 5, 6],
                                 'J1': [9, 6, 8, 7, 10, 6],
                                 'J2': [2, 1, 4, 1, 5, 2],
                                 'J3': [5, 3, 6, 2, 6, 4],
                                 'J4': [8, 2, 8, 6, 9, 7]})
        df_psych = df_psych.melt(id_vars='S', var_name='J', value_name='Y')
        icc = intraclass_corr(data=df_psych, targets='S', raters='J',
                              ratings='Y')
        np.testing.assert_almost_equal(np.round(icc['ICC'].to_numpy(), 2),
                                       [.17, .29, .71, .44, .62, .91])
        np.testing.assert_almost_equal(np.round(icc['F'], 1),
                                       [1.8, 11., 11., 1.8, 11., 11.])
        np.testing.assert_almost_equal((icc['df1']), [5] * 6)
        np.testing.assert_almost_equal((icc['df2']), [18, 15, 15, 18, 15, 15])
        np.testing.assert_almost_equal((icc['pval']),
                                       [0.16472, 0.00013, 0.00013, 0.16472,
                                        0.00013, 0.00013], decimal=4)
        lower = icc['CI95%'].explode().to_numpy()[::2].astype(float)
        upper = icc['CI95%'].explode().to_numpy()[1::2].astype(float)
        np.testing.assert_almost_equal(lower, [-.13, .02, .34, -.88, .07, .68])
        np.testing.assert_almost_equal(upper, [.72, .76, .95, .91, .93, .99])

        # Second example (real-statistics)
        df = read_dataset('icc')
        icc = intraclass_corr(data=df, targets='Wine', raters='Judge',
                              ratings='Scores')
        np.testing.assert_almost_equal(np.round(icc['ICC'].to_numpy(), 2),
                                       [0.73, 0.73, 0.73, 0.91, 0.91, 0.92])
        np.testing.assert_almost_equal(np.round(icc['F']), [12] * 6)
        np.testing.assert_almost_equal((icc['df1']), [7] * 6)
        np.testing.assert_almost_equal((icc['df2']), [24, 21, 21, 24, 21, 21])
        np.testing.assert_almost_equal((icc['pval']),
                                       [2.2e-06, 5.0e-06, 5.0e-06, 2.2e-06,
                                        5.0e-06, 5.0e-06])
        lower = icc['CI95%'].explode().to_numpy()[::2].astype(float)
        upper = icc['CI95%'].explode().to_numpy()[1::2].astype(float)
        np.testing.assert_almost_equal(lower, [.43, .43, .43, .75, .75, .75])
        np.testing.assert_almost_equal(upper, [.93, .93, .93, .98, .98, .98])
        # Test with missing values
        df['Scores'] = df['Scores'].astype(float)
        df.at[3, 'Scores'] = np.nan

        # nan_policy = 'omit'
        icc = intraclass_corr(data=df, targets='Wine', raters='Judge',
                              ratings='Scores', nan_policy='omit')
        np.testing.assert_almost_equal(np.round(icc['ICC'].to_numpy(), 2),
                                       [0.75, 0.75, 0.75, 0.92, 0.92, 0.92])
        np.testing.assert_almost_equal(np.round(icc['F']), [13] * 6)
        np.testing.assert_almost_equal((icc['df1']), [6] * 6)
        np.testing.assert_almost_equal((icc['df2']), [21, 18, 18, 21, 18, 18])

        # nan_policy = 'raise' (default)
        with pytest.raises(ValueError):
            intraclass_corr(data=df, targets='Wine', raters='Judge',
                            ratings='Scores', nan_policy='raise')

Example #12

0

Show file

    def test_rm_anova(self):
        """Test function rm_anova.
        Compare with JASP"""
        rm_anova(dv='Scores', within='Time', subject='Subject', data=df,
                 correction=False, detailed=False)
        rm_anova(dv='Scores', within='Time', subject='Subject', data=df,
                 correction=True, detailed=False)
        aov = rm_anova(dv='Scores', within='Time', subject='Subject', data=df,
                       correction='auto', detailed=True)
        # Compare with JASP
        assert np.allclose(aov.loc[0, 'F'], 3.913)
        assert np.allclose(np.round(aov.loc[0, 'p-unc'], 3), .023)
        assert np.allclose(aov.loc[0, 'np2'], .062)

        rm_anova(dv='Scores', within='Time', subject='Subject', data=df,
                 correction=True, detailed=True)
        rm_anova(dv='Scores', within=['Time'], subject='Subject', data=df_nan,
                 export_filename='test_export.csv')
        # Using a wide dataframe with NaN and compare with JASP
        data = read_dataset('rm_anova_wide')
        aov = data.rm_anova(detailed=True, correction=True)
        assert aov.loc[0, 'F'] == 5.201
        assert round(aov.loc[0, 'p-unc'], 3) == .007
        assert aov.loc[0, 'np2'] == .394
        assert aov.loc[0, 'eps'] == .694
        assert aov.loc[0, 'W-spher'] == .307
        assert round(aov.loc[0, 'p-GG-corr'], 3) == .017

Example #13

0

Show file

File: test_pairwise.py Project: jjwelton187/pingouin

 def test_pairwise_tukey(self):
     """Test function pairwise_tukey"""
     df = read_dataset('anova')
     stats = pairwise_tukey(dv='Pain threshold', between='Hair color',
                            data=df)
     assert np.allclose([0.074, 0.435, 0.415, 0.004, 0.789, 0.037],
                        stats.loc[:, 'p-tukey'].values.round(3), atol=0.05)

Example #14

0

Show file

File: test_parametric.py Project: palline1/pingouin

 def test_ancova(self):
     """Test function ancovan.
     Compare with JASP."""
     df = read_dataset('ancova')
     # With one covariate, balanced design, no missing values
     aov = ancova(data=df, dv='Scores', covar='Income',
                  between='Method').round(3)
     assert aov.loc[0, 'F'] == 3.336
     assert aov.loc[1, 'F'] == 29.419
     # With one covariate, missing values and unbalanced design
     df.loc[[1, 2], 'Scores'] = np.nan
     aov = ancova(data=df,
                  dv='Scores',
                  covar=['Income'],
                  between='Method',
                  export_filename='test_export.csv').round(3)
     assert aov.loc[0, 'F'] == 3.147
     assert aov.loc[1, 'F'] == 19.781
     assert aov.loc[2, 'DF'] == 29
     # With two covariates, missing values and unbalanced design
     aov = ancova(data=df,
                  dv='Scores',
                  covar=['Income', 'BMI'],
                  between='Method')
     assert aov.loc[0, 'F'] == 3.019
     assert aov.loc[1, 'F'] == 19.605
     assert aov.loc[2, 'F'] == 1.228
     assert aov.loc[3, 'DF'] == 28
     # Other parameters
     ancova(data=df,
            dv='Scores',
            covar=['Income', 'BMI'],
            between='Method',
            export_filename='test_export.csv')
     ancova(data=df, dv='Scores', covar=['Income'], between='Method')

Example #15

0

Show file

File: test_circular.py Project: zzy17667036/pingouin

 def test_circ_axial(self):
     """Test function circ_axial."""
     df = read_dataset('circular')
     angles = df['Orientation'].to_numpy()
     angles = circ_axial(np.deg2rad(angles), 2)
     assert np.allclose(
         np.round(angles, 4),
         [0, 0.7854, 1.5708, 2.3562, 3.1416, 3.9270, 4.7124, 5.4978])

Example #16

0

Show file

File: test_correlation.py Project: seralouk/pingouin

 def test_corr(self):
     """Test function corr"""
     np.random.seed(123)
     mean, cov = [4, 6], [(1, .6), (.6, 1)]
     x, y = np.random.multivariate_normal(mean, cov, 30).T
     x[3], y[5] = 12, -8
     corr(x, y, method='pearson', tail='one-sided')
     corr(x, y, method='spearman', tail='two-sided')
     corr(x, y, method='kendall')
     corr(x, y, method='shepherd', tail='two-sided')
     # Compare with robust corr toolbox
     stats = corr(x, y, method='skipped')
     assert np.round(stats['r'].to_numpy(), 3) == 0.512
     assert stats['outliers'].to_numpy() == 2
     # Changing the method using kwargs
     sk_sp = corr(x, y, method='skipped', corr_type='spearman')
     sk_pe = corr(x, y, method='skipped', corr_type='pearson')
     assert not sk_sp.equals(sk_pe)
     stats = corr(x, y, method='shepherd')
     assert stats['outliers'].to_numpy() == 2
     _, _, outliers = skipped(x, y, corr_type='pearson')
     assert outliers.size == x.size
     assert stats['n'].to_numpy() == 30
     stats = corr(x, y, method='percbend')
     assert np.round(stats['r'].to_numpy(), 3) == 0.484
     # Compare biweight correlation to astropy
     stats = corr(x, y, method='bicor')
     assert np.isclose(stats['r'].to_numpy(), 0.4951417784979)
     # Changing the value of C using kwargs
     stats = corr(x, y, method='bicor', c=5)
     assert np.isclose(stats['r'].to_numpy(), 0.4940706950017)
     # Not normally distributed
     z = np.random.uniform(size=30)
     corr(x, z, method='pearson')
     # With NaN values
     x[3] = np.nan
     corr(x, y)
     # With the same array
     # Disabled because of AppVeyor failure
     # assert corr(x, x).loc['pearson', 'BF10'] == str(np.inf)
     # Wrong argument
     with pytest.raises(ValueError):
         corr(x, y, method='error')
     # Compare BF10 with JASP
     df = read_dataset('pairwise_corr')
     stats = corr(df['Neuroticism'], df['Extraversion'])
     assert np.isclose(1 / float(stats['BF10'].to_numpy()), 1.478e-13)
     # When one column is a constant, the correlation is not defined
     # and Pingouin return a DataFrame full of NaN, except for ``n``
     x, y = [1, 1, 1], [1, 2, 3]
     stats = corr(x, y)
     assert stats.at['pearson', 'n']
     assert np.isnan(stats.at['pearson', 'r'])
     # Biweight midcorrelation returns NaN when MAD is not defined
     assert np.isnan(bicor(np.array([1, 1, 1, 1, 0, 1]), np.arange(6))[0])

Example #17

0

Show file

 def test_welch_anova(self):
     """Test function welch_anova."""
     # Pain dataset
     df_pain = read_dataset('anova')
     aov = welch_anova(dv='Pain threshold', between='Hair color',
                       data=df_pain, export_filename='test_export.csv')
     # Compare with R oneway.test function
     assert aov.loc[0, 'ddof1'] == 3
     assert np.allclose(aov.loc[0, 'ddof2'], 8.330)
     assert np.allclose(aov.loc[0, 'F'], 5.890)
     assert np.allclose(np.round(aov.loc[0, 'p-unc'], 4), .0188)

Example #18

0

Show file

 def test_welch_anova(self):
     """Test function welch_anova."""
     # Pain dataset
     df_pain = read_dataset('anova')
     aov = welch_anova(dv='Pain threshold', between='Hair color',
                       data=df_pain).round(4)
     # Compare with JASP
     assert aov.at[0, 'ddof1'] == 3
     assert aov.at[0, 'ddof2'] == 8.3298
     assert aov.at[0, 'F'] == 5.8901
     assert aov.at[0, 'p-unc'] == .0188
     assert aov.at[0, 'np2'] == 0.5760

Example #19

0

Show file

File: test_pairwise.py Project: jjwelton187/pingouin

 def test_pairwise_gameshowell(self):
     """Test function pairwise_gameshowell"""
     df = read_dataset('anova')
     stats = pairwise_gameshowell(dv='Pain threshold', between='Hair color',
                                  data=df)
     # Compare with R package `userfriendlyscience`
     np.testing.assert_array_equal(np.abs(stats['T'].round(2)),
                                   [2.48, 1.42, 1.75, 4.09, 1.11, 3.56])
     np.testing.assert_array_equal(stats['df'].round(2),
                                   [7.91, 7.94, 6.56, 8.0, 6.82, 6.77])
     sig = stats['pval'].apply(lambda x: 'Yes' if x < 0.05 else 'No').values
     np.testing.assert_array_equal(sig, ['No', 'No', 'No', 'Yes', 'No',
                                         'Yes'])

Example #20

0

Show file

 def test_cochran(self):
     """Test function cochran
     http://www.real-statistics.com/anova-repeated-measures/cochrans-q-test/
     """
     from pingouin import read_dataset
     df = read_dataset('cochran')
     st = cochran(dv='Energetic', within='Time', subject='Subject', data=df)
     assert st.loc['cochran', 'Q'] == 6.706
     assert np.allclose(st.loc['cochran', 'p-unc'], 0.034981)
     cochran(dv='Energetic', within='Time', subject='Subject', data=df)
     # With a NaN value
     df.loc[2, 'Energetic'] = np.nan
     cochran(dv='Energetic', within='Time', subject='Subject', data=df)

Example #21

0

Show file

 def test_cochran(self):
     """Test function cochran"""
     from pingouin import read_dataset
     df = read_dataset('cochran')
     st = cochran(dv='Energetic', within='Time', subject='Subject', data=df)
     assert st.loc['cochran', 'Q'] == 6.706
     cochran(dv='Energetic',
             within='Time',
             subject='Subject',
             data=df,
             export_filename='test_export.csv')
     # With a NaN value
     df.loc[2, 'Energetic'] = np.nan
     cochran(dv='Energetic', within='Time', subject='Subject', data=df)

Example #22

0

Show file

 def test_rmcorr(self):
     """Test function rm_corr"""
     df = read_dataset('rm_corr')
     # Test again rmcorr R package.
     stats = rm_corr(data=df, x='pH', y='PacO2', subject='Subject').round(3)
     assert stats.at["rm_corr", "r"] == -0.507
     assert stats.at["rm_corr", "dof"] == 38
     assert np.allclose(np.round(stats.at["rm_corr", "CI95%"], 2),
                        [-0.71, -0.23])
     assert stats.at["rm_corr", "pval"] == 0.001
     # Test with less than 3 subjects (same behavior as R package)
     with pytest.raises(ValueError):
         rm_corr(data=df[df['Subject'].isin([1, 2])], x='pH', y='PacO2',
                 subject='Subject')

Example #23

0

Show file

File: test_reliability.py Project: jjwelton187/pingouin

 def test_intraclass_corr(self):
     """Test function intraclass_corr"""
     df = read_dataset('icc')
     intraclass_corr(df, 'Wine', 'Judge', 'Scores', ci=.68)
     icc, ci = intraclass_corr(df, 'Wine', 'Judge', 'Scores')
     assert np.round(icc, 3) == 0.728
     assert ci[0] == .434
     assert ci[1] == .927
     with pytest.raises(ValueError):
         intraclass_corr(df, None, 'Judge', 'Scores')
     with pytest.raises(AssertionError):
         intraclass_corr(df, 'Wine', 'Judge', 'Judge')
     with pytest.raises(ValueError):
         intraclass_corr(df.drop(index=0), 'Wine', 'Judge', 'Scores')

Example #24

0

Show file

File: test_plotting.py Project: cyzhangAThit/pingouin

 def test_plot_paired(self):
     """Test plot_paired()"""
     df = read_dataset('mixed_anova')
     df = df.query("Group == 'Meditation' and Subject > 40 and "
                   "(Time == 'August' or Time == 'June')").copy()
     df.loc[[101, 161], 'Scores'] = 6
     ax = plot_paired(data=df, dv='Scores', within='Time',
                      subject='Subject')
     assert isinstance(ax, matplotlib.axes.Axes)
     _, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))
     plot_paired(data=df, dv='Scores', within='Time',
                 subject='Subject', boxplot=False, ax=ax1)
     plot_paired(data=df, dv='Scores', within='Time',
                 subject='Subject', order=['June', 'August'],
                 ax=ax2)

Example #25

0

Show file

File: test_parametric.py Project: yadevi/pingouin

 def test_ancova(self):
     """Test function ancovan.
     Compare with JASP.
     """
     df = read_dataset('ancova')
     # With one covariate, balanced design, no missing values
     aov = ancova(data=df, dv='Scores', covar='Income',
                  between='Method').round(4)
     array_equal(aov['DF'], [3, 1, 31])
     array_equal(aov['F'], [3.3365, 29.4194, np.nan])
     array_equal(aov['p-unc'], [0.0319, 0.000, np.nan])
     array_equal(aov['np2'], [0.2441, 0.4869, np.nan])
     aov = ancova(data=df,
                  dv='Scores',
                  covar='Income',
                  between='Method',
                  effsize="n2").round(4)
     array_equal(aov['n2'], [0.1421, 0.4177, np.nan])
     # With one covariate, missing values and unbalanced design
     df.loc[[1, 2], 'Scores'] = np.nan
     aov = ancova(data=df, dv='Scores', covar=['Income'],
                  between='Method').round(4)
     array_equal(aov['DF'], [3, 1, 29])
     array_equal(aov['F'], [3.1471, 19.7811, np.nan])
     array_equal(aov['p-unc'], [0.0400, 0.0001, np.nan])
     array_equal(aov['np2'], [0.2456, 0.4055, np.nan])
     # With two covariates, missing values and unbalanced design
     aov = ancova(data=df,
                  dv='Scores',
                  covar=['Income', 'BMI'],
                  between='Method').round(4)
     array_equal(aov['DF'], [3, 1, 1, 28])
     array_equal(aov['F'], [3.0186, 19.6045, 1.2279, np.nan])
     array_equal(aov['p-unc'], [0.0464, 0.0001, 0.2772, np.nan])
     array_equal(aov['np2'], [0.2444, 0.4118, 0.0420, np.nan])
     # Same but using standard eta-squared
     aov = ancova(data=df,
                  dv='Scores',
                  covar=['Income', 'BMI'],
                  between='Method',
                  effsize="n2").round(4)
     array_equal(aov['n2'], [0.1564, 0.3387, 0.0212, np.nan])
     # Other parameters
     ancova(data=df, dv='Scores', covar=['Income', 'BMI'], between='Method')
     ancova(data=df, dv='Scores', covar=['Income'], between='Method')

Example #26

0

Show file

 def test_cochran(self):
     """Test function cochran
     http://www.real-statistics.com/anova-repeated-measures/cochrans-q-test/
     """
     from pingouin import read_dataset
     df = read_dataset('cochran')
     st = cochran(dv='Energetic', within='Time', subject='Subject', data=df)
     assert round(st.at['cochran', 'Q'], 3) == 6.706
     assert np.isclose(st.at['cochran', 'p-unc'], 0.034981)
     # With Categorical
     df['Time'] = df['Time'].astype('category')
     df['Subject'] = df['Subject'].astype('category')
     df['Time'] = df['Time'].cat.add_categories("Unused")
     st = cochran(dv='Energetic', within='Time', subject='Subject', data=df)
     assert round(st.at['cochran', 'Q'], 3) == 6.706
     assert np.isclose(st.at['cochran', 'p-unc'], 0.034981)
     # With a NaN value
     df.loc[2, 'Energetic'] = np.nan
     cochran(dv='Energetic', within='Time', subject='Subject', data=df)

Example #27

0

Show file

    def test_rm_anova(self):
        """Test function rm_anova.
        Compare with JASP
        """
        rm_anova(dv='Scores', within='Time', subject='Subject', data=df,
                 correction=False, detailed=False)
        rm_anova(dv='Scores', within='Time', subject='Subject', data=df,
                 correction=True, detailed=False)
        # Compare with JASP
        aov = rm_anova(dv='Scores', within='Time', subject='Subject', data=df,
                       correction='auto', detailed=True).round(3)
        assert aov.at[0, 'F'] == 3.913
        assert aov.at[0, 'p-unc'] == .023
        assert aov.at[0, 'np2'] == .062

        # Same but with categorical columns
        aov = rm_anova(dv='Scores', within='Time', subject='Subject',
                       data=df_cat, correction='auto', detailed=True).round(3)
        assert aov.at[0, 'F'] == 3.913
        assert aov.at[0, 'p-unc'] == .023
        assert aov.at[0, 'np2'] == .062

        # With different effect sizes
        aov = rm_anova(dv='Scores', within='Time', subject='Subject', data=df,
                       correction='auto', effsize="n2").round(3)
        assert aov.at[0, 'n2'] == .062
        aov = rm_anova(dv='Scores', within='Time', subject='Subject', data=df,
                       correction='auto', detailed=True,
                       effsize="ng2").round(3)
        assert aov.at[0, 'ng2'] == .040

        rm_anova(dv='Scores', within='Time', subject='Subject', data=df,
                 correction=True, detailed=True)
        rm_anova(dv='Scores', within=['Time'], subject='Subject', data=df_nan)
        # Using a wide dataframe with NaN and compare with JASP
        data = read_dataset('rm_anova_wide')
        aov = data.rm_anova(detailed=True, correction=True).round(3)
        assert aov.at[0, 'F'] == 5.201
        assert aov.at[0, 'p-unc'] == .007
        assert aov.at[0, 'np2'] == .394
        assert aov.at[0, 'eps'] == .694
        assert aov.at[0, 'W-spher'] == .307
        assert aov.at[0, 'p-GG-corr'] == .017

Example #28

0

Show file

 def test_corr(self):
     """Test function corr"""
     np.random.seed(123)
     mean, cov = [4, 6], [(1, .6), (.6, 1)]
     x, y = np.random.multivariate_normal(mean, cov, 30).T
     x[3], y[5] = 12, -8
     corr(x, y, method='pearson', tail='one-sided')
     corr(x, y, method='spearman', tail='two-sided')
     corr(x, y, method='kendall')
     corr(x, y, method='shepherd', tail='two-sided')
     # Compare with robust corr toolbox
     stats = corr(x, y, method='skipped')
     assert stats['r'].values == 0.512
     assert stats['outliers'].values == 2
     stats = corr(x, y, method='shepherd')
     assert stats['outliers'].values == 2
     _, _, outliers = skipped(x, y, method='pearson')
     assert outliers.size == x.size
     assert stats['n'].values == 30
     stats = corr(x, y, method='percbend')
     assert stats['r'].values == 0.484
     # Not normally distributed
     z = np.random.uniform(size=30)
     corr(x, z, method='pearson')
     # With NaN values
     x[3] = np.nan
     corr(x, y)
     # With the same array
     assert float(corr(x, x).loc['pearson', 'BF10']) == np.inf
     # Wrong argument
     with pytest.raises(ValueError):
         corr(x, y, method='error')
     with pytest.raises(ValueError):
         corr(x, y[:-10])
     # Compare with JASP
     df = read_dataset('pairwise_corr')
     stats = corr(df['Neuroticism'], df['Extraversion'])
     assert np.isclose(1 / float(stats['BF10'].values), 1.478e-13)
     # With more than 100 values to see if BF10 is computed
     xx, yy = np.random.multivariate_normal(mean, cov, 1500).T
     c1500 = corr(xx, yy)
     assert 'BF10' not in c1500.keys()

Example #29

0

Show file

File: test_plotting.py Project: vishalbelsare/pingouin

 def test_plot_blandaltman(self):
     """Test plot_blandaltman()"""
     # With random data
     np.random.seed(123)
     mean, cov = [10, 11], [[1, 0.8], [0.8, 1]]
     x, y = np.random.multivariate_normal(mean, cov, 30).T
     ax = plot_blandaltman(x, y)
     assert isinstance(ax, matplotlib.axes.Axes)
     _, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))
     plot_blandaltman(x, y, agreement=2, confidence=None, ax=ax1)
     plot_blandaltman(x, y, agreement=2, confidence=.68, dpi=200, ax=ax2)
     plt.close('all')
     # With Pingouin's dataset
     df_ba = read_dataset("blandaltman")
     x, y = df_ba['A'], df_ba['B']
     plot_blandaltman(x, y)
     plot_blandaltman(x, y, annotate=False)
     plot_blandaltman(x, y, xaxis="x", confidence=None)
     plot_blandaltman(x, y, xaxis="y")
     plt.close('all')

Example #30

0

Show file

File: test_distribution.py Project: cyzhangAThit/pingouin

import numpy as np
import pytest

from unittest import TestCase
from pingouin.distribution import (gzscore, normality, anderson, epsilon,
                                   homoscedasticity, sphericity)
from pingouin import read_dataset

# Generate random dataframe
df = read_dataset('mixed_anova.csv')
df_nan = df.copy()
df_nan.iloc[[4, 15], 0] = np.nan

# Create random normal variables
np.random.seed(1234)
x = np.random.normal(scale=1., size=100)
y = np.random.normal(scale=0.8, size=100)
z = np.random.normal(scale=0.9, size=100)


class TestDistribution(TestCase):
    """Test distribution.py."""
    def test_gzscore(self):
        """Test function gzscore."""
        raw = np.random.lognormal(size=100)
        gzscore(raw)

    def test_normality(self):
        """Test function test_normality."""
        normality(x, alpha=.05)
        normality(x, y, alpha=.05)