Ejemplo n.º 1
0
 def test_corr(self):
     """Test function corr"""
     np.random.seed(123)
     mean, cov = [4, 6], [(1, .6), (.6, 1)]
     x, y = np.random.multivariate_normal(mean, cov, 30).T
     x[3], y[5] = 12, -8
     corr(x, y, method='pearson', tail='one-sided')
     corr(x, y, method='spearman', tail='two-sided')
     corr(x, y, method='kendall')
     corr(x, y, method='shepherd', tail='two-sided')
     # Compare with robust corr toolbox
     stats = corr(x, y, method='skipped')
     assert np.round(stats['r'].to_numpy(), 3) == 0.512
     assert stats['outliers'].to_numpy() == 2
     # Changing the method using kwargs
     sk_sp = corr(x, y, method='skipped', corr_type='spearman')
     sk_pe = corr(x, y, method='skipped', corr_type='pearson')
     assert not sk_sp.equals(sk_pe)
     stats = corr(x, y, method='shepherd')
     assert stats['outliers'].to_numpy() == 2
     _, _, outliers = skipped(x, y, corr_type='pearson')
     assert outliers.size == x.size
     assert stats['n'].to_numpy() == 30
     stats = corr(x, y, method='percbend')
     assert np.round(stats['r'].to_numpy(), 3) == 0.484
     # Compare biweight correlation to astropy
     stats = corr(x, y, method='bicor')
     assert np.isclose(stats['r'].to_numpy(), 0.4951417784979)
     # Changing the value of C using kwargs
     stats = corr(x, y, method='bicor', c=5)
     assert np.isclose(stats['r'].to_numpy(), 0.4940706950017)
     # Not normally distributed
     z = np.random.uniform(size=30)
     corr(x, z, method='pearson')
     # With NaN values
     x[3] = np.nan
     corr(x, y)
     # With the same array
     # Disabled because of AppVeyor failure
     # assert corr(x, x).loc['pearson', 'BF10'] == str(np.inf)
     # Wrong argument
     with pytest.raises(ValueError):
         corr(x, y, method='error')
     # Compare BF10 with JASP
     df = read_dataset('pairwise_corr')
     stats = corr(df['Neuroticism'], df['Extraversion'])
     assert np.isclose(1 / float(stats['BF10'].to_numpy()), 1.478e-13)
     # When one column is a constant, the correlation is not defined
     # and Pingouin return a DataFrame full of NaN, except for ``n``
     x, y = [1, 1, 1], [1, 2, 3]
     stats = corr(x, y)
     assert stats.at['pearson', 'n']
     assert np.isnan(stats.at['pearson', 'r'])
     # Biweight midcorrelation returns NaN when MAD is not defined
     assert np.isnan(bicor(np.array([1, 1, 1, 1, 0, 1]), np.arange(6))[0])
Ejemplo n.º 2
0
    def test_corr(self):
        """Test function corr

        Compare to R `correlation` package. See test_correlation.R file.
        """
        np.random.seed(123)
        mean, cov = [4, 6], [(1, .6), (.6, 1)]
        x, y = np.random.multivariate_normal(mean, cov, 30).T
        x2, y2 = x.copy(), y.copy()
        x[3], y[5] = 12, -8
        x2[3], y2[5] = 7, 2.6

        # Pearson correlation
        stats = corr(x, y, method='pearson')
        assert np.isclose(stats.loc['pearson', 'r'], 0.1761221)
        assert np.isclose(stats.loc['pearson', 'p-val'], 0.3518659)
        assert stats.loc['pearson', 'CI95%'][0] == round(-0.1966232, 2)
        assert stats.loc['pearson', 'CI95%'][1] == round(0.5043872, 2)
        # - One-sided: greater
        stats = corr(x, y, method='pearson', alternative='greater')
        assert np.isclose(stats.loc['pearson', 'r'], 0.1761221)
        assert np.isclose(stats.loc['pearson', 'p-val'], 0.175933)
        assert stats.loc['pearson', 'CI95%'][0] == round(-0.1376942, 2)
        assert stats.loc['pearson', 'CI95%'][1] == 1
        # - One-sided: less
        stats = corr(x, y, method='pearson', alternative='less')
        assert np.isclose(stats.loc['pearson', 'r'], 0.1761221)
        assert np.isclose(stats.loc['pearson', 'p-val'], 0.824067)
        assert stats.loc['pearson', 'CI95%'][0] == -1
        assert stats.loc['pearson', 'CI95%'][1] == round(0.4578044, 2)

        # Spearman correlation
        stats = corr(x, y, method='spearman')
        assert np.isclose(stats.loc['spearman', 'r'], 0.4740823)
        assert np.isclose(stats.loc['spearman', 'p-val'], 0.008129768)
        # CI are calculated using a different formula for Spearman in R
        # assert stats.loc['spearman', 'CI95%'][0] == round(0.1262988, 2)
        # assert stats.loc['spearman', 'CI95%'][1] == round(0.7180799, 2)

        # Kendall correlation
        # R uses a different estimation method than scipy for the p-value
        stats = corr(x, y, method='kendall')
        assert np.isclose(stats.loc['kendall', 'r'], 0.3517241)
        # Skipped correlation -- compare with robust corr toolbox
        # https://sourceforge.net/projects/robustcorrtool/
        stats = corr(x, y, method='skipped')
        assert round(stats.loc['skipped', 'r'], 4) == 0.5123
        assert stats.loc['skipped', 'outliers'] == 2
        sk_sp = corr(x2, y2, method='skipped')
        assert round(sk_sp.loc['skipped', 'r'], 4) == 0.5123
        assert sk_sp.loc['skipped', 'outliers'] == 2
        # Pearson skipped correlation
        sk_pe = corr(x2, y2, method='skipped', corr_type='pearson')
        assert np.round(sk_pe.loc['skipped', 'r'], 4) == 0.5254
        assert sk_pe.loc['skipped', 'outliers'] == 2
        assert not sk_sp.equals(sk_pe)
        # Shepherd
        stats = corr(x, y, method='shepherd')
        assert np.isclose(stats.loc['shepherd', 'r'], 0.5123153)
        assert np.isclose(stats.loc['shepherd', 'p-val'], 0.005316)
        assert stats.loc['shepherd', 'outliers'] == 2
        _, _, outliers = skipped(x, y, corr_type='pearson')
        assert outliers.size == x.size
        assert stats.loc['shepherd', 'n'] == 30
        # Percbend -- compare with robust corr toolbox
        stats = corr(x, y, method='percbend')
        assert round(stats.loc['percbend', 'r'], 4) == 0.4843
        assert np.isclose(stats.loc['percbend', 'r'], 0.4842686)
        assert np.isclose(stats.loc['percbend', 'p-val'], 0.006693313)
        stats = corr(x2, y2, method='percbend')
        assert round(stats.loc['percbend', 'r'], 4) == 0.4843
        stats = corr(x, y, method='percbend', beta=.5)
        assert round(stats.loc['percbend', 'r'], 4) == 0.4848
        # Compare biweight correlation to astropy
        stats = corr(x, y, method='bicor')
        assert np.isclose(stats.loc['bicor', 'r'], 0.4951418)
        assert np.isclose(stats.loc['bicor', 'p-val'], 0.005403701)
        assert stats.loc['bicor', 'CI95%'][0] == round(0.1641553, 2)
        assert stats.loc['bicor', 'CI95%'][1] == round(0.7259185, 2)
        stats = corr(x, y, method='bicor', c=5)
        assert np.isclose(stats.loc['bicor', 'r'], 0.4940706950017)
        # Not normally distributed
        z = np.random.uniform(size=30)
        corr(x, z, method='pearson')
        # With NaN values
        x[3] = np.nan
        corr(x, y)
        # With the same array
        # Disabled because of AppVeyor failure
        # assert corr(x, x).loc['pearson', 'BF10'] == str(np.inf)
        # Wrong argument
        with pytest.raises(ValueError):
            corr(x, y, method='error')
        with pytest.raises(ValueError):
            corr(x, y, tail='error')
        # Compare BF10 with JASP
        df = read_dataset('pairwise_corr')
        stats = corr(df['Neuroticism'], df['Extraversion'])
        assert np.isclose(1 / float(stats['BF10'].to_numpy()), 1.478e-13)
        # Perfect correlation, CI and power should be 1, BF should be Inf
        # https://github.com/raphaelvallat/pingouin/issues/195
        stats = corr(x, x)
        assert np.isclose(stats.at['pearson', 'r'], 1)
        assert np.isclose(stats.at['pearson', 'power'], 1)
        # When one column is a constant, the correlation is not defined
        # and Pingouin return a DataFrame full of NaN, except for ``n``
        x, y = [1, 1, 1], [1, 2, 3]
        stats = corr(x, y)
        assert stats.at['pearson', 'n']
        assert np.isnan(stats.at['pearson', 'r'])
        # Biweight midcorrelation returns NaN when MAD is not defined
        assert np.isnan(bicor(np.array([1, 1, 1, 1, 0, 1]), np.arange(6))[0])