Exemple #1
0
def _pairwise_corr(self, columns=None, covar=None, tail='two-sided',
                   method='pearson', padjust='none', export_filename=None):
    """Pairwise (partial) correlations."""
    stats = pairwise_corr(data=self, columns=columns, covar=covar,
                          tail=tail, method=method, padjust=padjust,
                          export_filename=export_filename)
    return stats
Exemple #2
0
 def test_pairwise_corr(self):
     """Test function pairwise_corr"""
     # Load JASP Big 5 DataSets (remove subject column)
     data = read_dataset('pairwise_corr').iloc[:, 1:]
     stats = pairwise_corr(data=data, method='pearson', tail='two-sided')
     jasp_rval = [
         -0.350, -0.01, -.134, -.368, .267, .055, .065, .159, -.013, .159
     ]
     assert np.allclose(stats['r'].values, jasp_rval)
     assert stats['n'].values[0] == 500
     # Correct for multiple comparisons
     pairwise_corr(data=data,
                   method='spearman',
                   tail='one-sided',
                   padjust='bonf')
     # Export
     pairwise_corr(data=data,
                   method='spearman',
                   tail='one-sided',
                   export_filename='test_export.csv')
     # Check with a subset of columns
     pairwise_corr(data=data, columns=['Neuroticism', 'Extraversion'])
     with pytest.raises(ValueError):
         pairwise_corr(data=data, tail='wrong')
     with pytest.raises(ValueError):
         pairwise_corr(data=data, columns='wrong')
     # Check with non-numeric columns
     data['test'] = 'test'
     pairwise_corr(data=data, method='pearson')
     # Check different variation of product / combination
     n = data.shape[0]
     data['Age'] = np.random.randint(18, 65, n)
     data['IQ'] = np.random.normal(105, 1, n)
     data['One'] = 1
     data['Gender'] = np.repeat(['M', 'F'], int(n / 2))
     pairwise_corr(data,
                   columns=['Neuroticism', 'Gender'],
                   method='shepherd')
     pairwise_corr(data, columns=['Neuroticism', 'Extraversion', 'Gender'])
     pairwise_corr(data, columns=['Neuroticism'])
     pairwise_corr(data, columns='Neuroticism', method='skipped')
     pairwise_corr(data, columns=[['Neuroticism']], method='spearman')
     pairwise_corr(data, columns=[['Neuroticism'], None], method='percbend')
     pairwise_corr(data, columns=[['Neuroticism', 'Gender'], ['Age']])
     pairwise_corr(data, columns=[['Neuroticism'], ['Age', 'IQ']])
     pairwise_corr(data, columns=[['Age', 'IQ'], []])
     pairwise_corr(data, columns=['Age', 'Gender', 'IQ', 'Wrong'])
     pairwise_corr(data, columns=['Age', 'Gender', 'Wrong'])
     # Test with no good combinations
     with pytest.raises(ValueError):
         pairwise_corr(data, columns=['Gender', 'Gender'])
     # Test when one column has only one unique value
     pairwise_corr(data=data, columns=['Age', 'One', 'Gender'])
     stats = pairwise_corr(data, columns=['Neuroticism', 'IQ', 'One'])
     assert stats.shape[0] == 1
     # Test with covariate
     pairwise_corr(data, covar='Age')
     pairwise_corr(data, covar=['Age', 'Neuroticism'])
     with pytest.raises(AssertionError):
         pairwise_corr(data, covar=['Age', 'Gender'])
     with pytest.raises(ValueError):
         pairwise_corr(data, columns=['Neuroticism', 'Age'], covar='Age')
     # Partial pairwise with missing values
     data.loc[4, 'Age'] = np.nan
     data.loc[10, 'Neuroticism'] = np.nan
     pairwise_corr(data, covar='Age')
     ######################################################################
     # MultiIndex columns
     from numpy.random import random as rdm
     # Create MultiIndex dataframe
     columns = pd.MultiIndex.from_tuples([('Behavior', 'Rating'),
                                          ('Behavior', 'RT'),
                                          ('Physio', 'BOLD'),
                                          ('Physio', 'HR'),
                                          ('Psycho', 'Anxiety')])
     data = pd.DataFrame(
         dict(Rating=rdm(size=10),
              RT=rdm(size=10),
              BOLD=rdm(size=10),
              HR=rdm(size=10),
              Anxiety=rdm(size=10)))
     data.columns = columns
     pairwise_corr(data, method='spearman')
     stats = pairwise_corr(data, columns=[('Behavior', 'Rating')])
     assert stats.shape[0] == data.shape[1] - 1
     pairwise_corr(data,
                   columns=[('Behavior', 'Rating'), ('Behavior', 'RT')])
     st1 = pairwise_corr(data,
                         columns=[[('Behavior', 'Rating'),
                                   ('Behavior', 'RT')], None])
     st2 = pairwise_corr(data,
                         columns=[[('Behavior', 'Rating'),
                                   ('Behavior', 'RT')]])
     assert st1['X'].equals(st2['X'])
     st3 = pairwise_corr(data,
                         columns=[[('Behavior', 'Rating')],
                                  [('Behavior', 'RT'), ('Physio', 'BOLD')]])
     assert st3.shape[0] == 2
     # With covar
     pairwise_corr(data, covar=[('Psycho', 'Anxiety')])
     pairwise_corr(data,
                   columns=[('Behavior', 'Rating')],
                   covar=[('Psycho', 'Anxiety')])
 def test_pairwise_corr(self):
     """Test function pairwise_corr"""
     # Load JASP Big 5 DataSets (remove subject column)
     data = read_dataset('pairwise_corr').iloc[:, 1:]
     stats = pairwise_corr(data=data, method='pearson', tail='two-sided')
     jasp_rval = [-0.350, -0.01, -.134, -.368, .267, .055, .065, .159,
                  -.013, .159]
     assert np.allclose(stats['r'].values, jasp_rval)
     assert stats['n'].values[0] == 500
     # Correct for multiple comparisons
     pairwise_corr(data=data, method='spearman', tail='one-sided',
                   padjust='bonf')
     # Export
     pairwise_corr(data=data, method='spearman', tail='one-sided',
                   export_filename='test_export.csv')
     # Check with a subset of columns
     pairwise_corr(data=data, columns=['Neuroticism', 'Extraversion'])
     with pytest.raises(ValueError):
         pairwise_corr(data=data, tail='wrong')
     # Check with non-numeric columns
     data['test'] = 'test'
     pairwise_corr(data=data, method='pearson')
     # Check different variation of product / combination
     n = data.shape[0]
     data['Age'] = np.random.randint(18, 65, n)
     data['IQ'] = np.random.normal(105, 1, n)
     data['Gender'] = np.repeat(['M', 'F'], int(n / 2))
     pairwise_corr(data, columns=['Neuroticism', 'Gender'])
     pairwise_corr(data, columns=['Neuroticism', 'Extraversion', 'Gender'])
     pairwise_corr(data, columns=['Neuroticism'])
     pairwise_corr(data, columns='Neuroticism')
     pairwise_corr(data, columns=[['Neuroticism']])
     pairwise_corr(data, columns=[['Neuroticism'], None])
     pairwise_corr(data, columns=[['Neuroticism', 'Gender'], ['Age']])
     pairwise_corr(data, columns=[['Neuroticism'], ['Age', 'IQ']])
     pairwise_corr(data, columns=[['Age', 'IQ'], []])
     pairwise_corr(data, columns=['Age', 'Gender', 'IQ', 'Wrong'])
     pairwise_corr(data, columns=['Age', 'Gender', 'Wrong'])
     # Test with more than 1000 columns (BF10 not computed)
     data1500 = pd.concat([data, data, data], ignore_index=True)
     pcor1500 = pairwise_corr(data1500, method='pearson')
     assert 'BF10' not in pcor1500.keys()
 def test_pairwise_corr(self):
     """Test function pairwise_corr"""
     # Load JASP Big 5 DataSets (remove subject column)
     data = read_dataset('pairwise_corr').iloc[:, 1:]
     stats = pairwise_corr(data=data, method='pearson', tail='two-sided')
     jasp_rval = [-0.350, -0.01, -.134, -.368, .267, .055, .065, .159,
                  -.013, .159]
     assert np.allclose(stats['r'].round(3).to_numpy(), jasp_rval)
     assert stats['n'].to_numpy()[0] == 500
     # Correct for multiple comparisons
     pairwise_corr(data=data, method='spearman', tail='one-sided',
                   padjust='bonf')
     # Check with a subset of columns
     pairwise_corr(data=data, columns=['Neuroticism', 'Extraversion'])
     with pytest.raises(ValueError):
         pairwise_corr(data=data, columns='wrong')
     # Check with non-numeric columns
     data['test'] = 'test'
     pairwise_corr(data=data, method='pearson')
     # Check different variation of product / combination
     n = data.shape[0]
     data['Age'] = np.random.randint(18, 65, n)
     data['IQ'] = np.random.normal(105, 1, n)
     data['One'] = 1
     data['Gender'] = np.repeat(['M', 'F'], int(n / 2))
     pairwise_corr(data, columns=['Neuroticism', 'Gender'],
                   method='shepherd')
     pairwise_corr(data, columns=['Neuroticism', 'Extraversion', 'Gender'])
     pairwise_corr(data, columns=['Neuroticism'])
     pairwise_corr(data, columns='Neuroticism', method='skipped')
     pairwise_corr(data, columns=[['Neuroticism']], method='spearman')
     pairwise_corr(data, columns=[['Neuroticism'], None], method='percbend')
     pairwise_corr(data, columns=[['Neuroticism', 'Gender'], ['Age']])
     pairwise_corr(data, columns=[['Neuroticism'], ['Age', 'IQ']])
     pairwise_corr(data, columns=[['Age', 'IQ'], []])
     pairwise_corr(data, columns=['Age', 'Gender', 'IQ', 'Wrong'])
     pairwise_corr(data, columns=['Age', 'Gender', 'Wrong'])
     # Test with no good combinations
     with pytest.raises(ValueError):
         pairwise_corr(data, columns=['Gender', 'Gender'])
     # Test when one column has only one unique value
     pairwise_corr(data=data, columns=['Age', 'One', 'Gender'])
     stats = pairwise_corr(data, columns=['Neuroticism', 'IQ', 'One'])
     assert stats.shape[0] == 1
     # Test with covariate
     pairwise_corr(data, covar='Age')
     pairwise_corr(data, covar=['Age', 'Neuroticism'])
     with pytest.raises(AssertionError):
         pairwise_corr(data, covar=['Age', 'Gender'])
     # Partial pairwise with overlapping covariates
     pairwise_corr(
         data.drop(columns=['One', 'Gender']),
         covar=data.drop(columns=['test', 'One', 'Gender']).columns
     )
     pairwise_corr(data, columns='Neuroticism', covar='Age')
     with pytest.raises(AssertionError):
         pairwise_corr(data, columns=['Neuroticism', 'Age'], covar='One')
     # Test against pcorr:
     _pcorr = data.drop(columns=['One', 'Gender']).pcorr().stack()
     _pwcorr = pairwise_corr(
         data.drop(columns=['One', 'Gender']),
         covar=data.drop(columns=['test', 'One', 'Gender']).columns
     )[['X', 'Y', 'r']].set_index(['X', 'Y']).squeeze()
     assert np.allclose(_pcorr.reindex(_pwcorr.index), _pwcorr)
     # Partial pairwise with missing values
     data.loc[[4, 5, 8, 20, 22], 'Age'] = np.nan
     data.loc[[10, 12], 'Neuroticism'] = np.nan
     pairwise_corr(data)
     pairwise_corr(data, covar='Age')
     # Listwise deletion
     assert (pairwise_corr(data, covar='Age',
                           nan_policy='listwise')['n'].nunique() == 1)
     assert pairwise_corr(data, nan_policy='listwise')['n'].nunique() == 1
     ######################################################################
     # MultiIndex columns
     from numpy.random import random as rdm
     # Create MultiIndex dataframe
     columns = pd.MultiIndex.from_tuples([('Behavior', 'Rating'),
                                          ('Behavior', 'RT'),
                                          ('Physio', 'BOLD'),
                                          ('Physio', 'HR'),
                                          ('Psycho', 'Anxiety')])
     data = pd.DataFrame(dict(Rating=rdm(size=10),
                              RT=rdm(size=10),
                              BOLD=rdm(size=10),
                              HR=rdm(size=10),
                              Anxiety=rdm(size=10)))
     data.columns = columns
     pairwise_corr(data, method='spearman')
     stats = pairwise_corr(data, columns=[('Behavior', 'Rating')])
     assert stats.shape[0] == data.shape[1] - 1
     pairwise_corr(data, columns=[('Behavior', 'Rating'),
                                  ('Behavior', 'RT')])
     st1 = pairwise_corr(data, columns=[[('Behavior', 'Rating'),
                                         ('Behavior', 'RT')], None])
     st2 = pairwise_corr(data, columns=[[('Behavior', 'Rating'),
                                         ('Behavior', 'RT')]])
     assert st1['X'].equals(st2['X'])
     st3 = pairwise_corr(data, columns=[[('Behavior', 'Rating')],
                                        [('Behavior', 'RT'),
                                         ('Physio', 'BOLD')]])
     assert st3.shape[0] == 2
     # With covar
     pairwise_corr(data, covar=[('Psycho', 'Anxiety')])
     pairwise_corr(data, columns=[('Behavior', 'Rating')],
                   covar=[('Psycho', 'Anxiety')])
     pairwise_corr(data, covar=data.columns)
     # Test against pcorr:
     _pcorr = data.pcorr()
     _pcorr.index = _pcorr.index.tolist()  # MultiIndex to tuples
     _pcorr.columns = _pcorr.columns.tolist()  # MultiIndex to tuples
     _pwcorr = pairwise_corr(
         data, covar=data.columns
     )[['X', 'Y', 'r']].set_index(['X', 'Y']).squeeze()
     assert np.allclose(_pcorr.stack().reindex(_pwcorr.index), _pwcorr)
     # With missing values
     data.iloc[2, [2, 3]] = np.nan
     data.iloc[[1, 4], [1, 4]] = np.nan
     assert pairwise_corr(data, nan_policy='listwise')['n'].nunique() == 1
     assert pairwise_corr(data, nan_policy='pairwise')['n'].nunique() == 3
     assert (pairwise_corr(data, columns=[('Behavior', 'Rating')],
             covar=[('Psycho', 'Anxiety')],
             nan_policy='listwise')['n'].nunique() == 1)
     assert (pairwise_corr(data, columns=[('Behavior', 'Rating')],
             covar=[('Psycho', 'Anxiety')],
             nan_policy='pairwise')['n'].nunique() == 2)