def test_corr(self): """Test function corr""" np.random.seed(123) mean, cov = [4, 6], [(1, .6), (.6, 1)] x, y = np.random.multivariate_normal(mean, cov, 30).T x[3], y[5] = 12, -8 corr(x, y, method='pearson', tail='one-sided') corr(x, y, method='spearman', tail='two-sided') corr(x, y, method='kendall') corr(x, y, method='shepherd', tail='two-sided') # Compare with robust corr toolbox stats = corr(x, y, method='skipped') assert np.round(stats['r'].to_numpy(), 3) == 0.512 assert stats['outliers'].to_numpy() == 2 # Changing the method using kwargs sk_sp = corr(x, y, method='skipped', corr_type='spearman') sk_pe = corr(x, y, method='skipped', corr_type='pearson') assert not sk_sp.equals(sk_pe) stats = corr(x, y, method='shepherd') assert stats['outliers'].to_numpy() == 2 _, _, outliers = skipped(x, y, corr_type='pearson') assert outliers.size == x.size assert stats['n'].to_numpy() == 30 stats = corr(x, y, method='percbend') assert np.round(stats['r'].to_numpy(), 3) == 0.484 # Compare biweight correlation to astropy stats = corr(x, y, method='bicor') assert np.isclose(stats['r'].to_numpy(), 0.4951417784979) # Changing the value of C using kwargs stats = corr(x, y, method='bicor', c=5) assert np.isclose(stats['r'].to_numpy(), 0.4940706950017) # Not normally distributed z = np.random.uniform(size=30) corr(x, z, method='pearson') # With NaN values x[3] = np.nan corr(x, y) # With the same array # Disabled because of AppVeyor failure # assert corr(x, x).loc['pearson', 'BF10'] == str(np.inf) # Wrong argument with pytest.raises(ValueError): corr(x, y, method='error') # Compare BF10 with JASP df = read_dataset('pairwise_corr') stats = corr(df['Neuroticism'], df['Extraversion']) assert np.isclose(1 / float(stats['BF10'].to_numpy()), 1.478e-13) # When one column is a constant, the correlation is not defined # and Pingouin return a DataFrame full of NaN, except for ``n`` x, y = [1, 1, 1], [1, 2, 3] stats = corr(x, y) assert stats.at['pearson', 'n'] assert np.isnan(stats.at['pearson', 'r']) # Biweight midcorrelation returns NaN when MAD is not defined assert np.isnan(bicor(np.array([1, 1, 1, 1, 0, 1]), np.arange(6))[0])
def test_corr(self): """Test function corr Compare to R `correlation` package. See test_correlation.R file. """ np.random.seed(123) mean, cov = [4, 6], [(1, .6), (.6, 1)] x, y = np.random.multivariate_normal(mean, cov, 30).T x2, y2 = x.copy(), y.copy() x[3], y[5] = 12, -8 x2[3], y2[5] = 7, 2.6 # Pearson correlation stats = corr(x, y, method='pearson') assert np.isclose(stats.loc['pearson', 'r'], 0.1761221) assert np.isclose(stats.loc['pearson', 'p-val'], 0.3518659) assert stats.loc['pearson', 'CI95%'][0] == round(-0.1966232, 2) assert stats.loc['pearson', 'CI95%'][1] == round(0.5043872, 2) # - One-sided: greater stats = corr(x, y, method='pearson', alternative='greater') assert np.isclose(stats.loc['pearson', 'r'], 0.1761221) assert np.isclose(stats.loc['pearson', 'p-val'], 0.175933) assert stats.loc['pearson', 'CI95%'][0] == round(-0.1376942, 2) assert stats.loc['pearson', 'CI95%'][1] == 1 # - One-sided: less stats = corr(x, y, method='pearson', alternative='less') assert np.isclose(stats.loc['pearson', 'r'], 0.1761221) assert np.isclose(stats.loc['pearson', 'p-val'], 0.824067) assert stats.loc['pearson', 'CI95%'][0] == -1 assert stats.loc['pearson', 'CI95%'][1] == round(0.4578044, 2) # Spearman correlation stats = corr(x, y, method='spearman') assert np.isclose(stats.loc['spearman', 'r'], 0.4740823) assert np.isclose(stats.loc['spearman', 'p-val'], 0.008129768) # CI are calculated using a different formula for Spearman in R # assert stats.loc['spearman', 'CI95%'][0] == round(0.1262988, 2) # assert stats.loc['spearman', 'CI95%'][1] == round(0.7180799, 2) # Kendall correlation # R uses a different estimation method than scipy for the p-value stats = corr(x, y, method='kendall') assert np.isclose(stats.loc['kendall', 'r'], 0.3517241) # Skipped correlation -- compare with robust corr toolbox # https://sourceforge.net/projects/robustcorrtool/ stats = corr(x, y, method='skipped') assert round(stats.loc['skipped', 'r'], 4) == 0.5123 assert stats.loc['skipped', 'outliers'] == 2 sk_sp = corr(x2, y2, method='skipped') assert round(sk_sp.loc['skipped', 'r'], 4) == 0.5123 assert sk_sp.loc['skipped', 'outliers'] == 2 # Pearson skipped correlation sk_pe = corr(x2, y2, method='skipped', corr_type='pearson') assert np.round(sk_pe.loc['skipped', 'r'], 4) == 0.5254 assert sk_pe.loc['skipped', 'outliers'] == 2 assert not sk_sp.equals(sk_pe) # Shepherd stats = corr(x, y, method='shepherd') assert np.isclose(stats.loc['shepherd', 'r'], 0.5123153) assert np.isclose(stats.loc['shepherd', 'p-val'], 0.005316) assert stats.loc['shepherd', 'outliers'] == 2 _, _, outliers = skipped(x, y, corr_type='pearson') assert outliers.size == x.size assert stats.loc['shepherd', 'n'] == 30 # Percbend -- compare with robust corr toolbox stats = corr(x, y, method='percbend') assert round(stats.loc['percbend', 'r'], 4) == 0.4843 assert np.isclose(stats.loc['percbend', 'r'], 0.4842686) assert np.isclose(stats.loc['percbend', 'p-val'], 0.006693313) stats = corr(x2, y2, method='percbend') assert round(stats.loc['percbend', 'r'], 4) == 0.4843 stats = corr(x, y, method='percbend', beta=.5) assert round(stats.loc['percbend', 'r'], 4) == 0.4848 # Compare biweight correlation to astropy stats = corr(x, y, method='bicor') assert np.isclose(stats.loc['bicor', 'r'], 0.4951418) assert np.isclose(stats.loc['bicor', 'p-val'], 0.005403701) assert stats.loc['bicor', 'CI95%'][0] == round(0.1641553, 2) assert stats.loc['bicor', 'CI95%'][1] == round(0.7259185, 2) stats = corr(x, y, method='bicor', c=5) assert np.isclose(stats.loc['bicor', 'r'], 0.4940706950017) # Not normally distributed z = np.random.uniform(size=30) corr(x, z, method='pearson') # With NaN values x[3] = np.nan corr(x, y) # With the same array # Disabled because of AppVeyor failure # assert corr(x, x).loc['pearson', 'BF10'] == str(np.inf) # Wrong argument with pytest.raises(ValueError): corr(x, y, method='error') with pytest.raises(ValueError): corr(x, y, tail='error') # Compare BF10 with JASP df = read_dataset('pairwise_corr') stats = corr(df['Neuroticism'], df['Extraversion']) assert np.isclose(1 / float(stats['BF10'].to_numpy()), 1.478e-13) # Perfect correlation, CI and power should be 1, BF should be Inf # https://github.com/raphaelvallat/pingouin/issues/195 stats = corr(x, x) assert np.isclose(stats.at['pearson', 'r'], 1) assert np.isclose(stats.at['pearson', 'power'], 1) # When one column is a constant, the correlation is not defined # and Pingouin return a DataFrame full of NaN, except for ``n`` x, y = [1, 1, 1], [1, 2, 3] stats = corr(x, y) assert stats.at['pearson', 'n'] assert np.isnan(stats.at['pearson', 'r']) # Biweight midcorrelation returns NaN when MAD is not defined assert np.isnan(bicor(np.array([1, 1, 1, 1, 0, 1]), np.arange(6))[0])