def test_mad(self): """Test function mad.""" a = [1.2, 3, 4.5, 2.4, 5, 6.7, 0.4] # Compare to Matlab assert mad(a, normalize=False) == 1.8 assert np.round(mad(a), 3) == np.round(1.8 * 1.4826, 3)
def test_mad(self): """Test function mad.""" from scipy.stats import median_abs_deviation as mad_scp a = [1.2, 3, 4.5, 2.4, 5, 6.7, 0.4] # Compare to Matlab assert mad(a, normalize=False) == 1.8 assert np.round(mad(a), 3) == np.round(1.8 * 1.4826, 3) # Axes handling -- Compare to SciPy assert np.allclose(mad_scp(w, scale='normal'), mad(w)) # Axis = 0 assert np.allclose(mad_scp(w, scale='normal', axis=1), mad(w, axis=1)) assert np.allclose(mad_scp(w, scale='normal', axis=None), mad(w, axis=None)) # Missing values # Note that in Scipy 1.3.0, mad(axis=0/1) does not work properly # if data contains NaN, even when passing (nan_policy='omit') wnan = w.copy() wnan[3, 2] = np.nan assert np.allclose( mad_scp(wnan, scale='normal', axis=None, nan_policy='omit'), mad(wnan, axis=None)) assert mad(wnan, axis=0).size == wnan.shape[1] assert mad(wnan, axis=1).size == wnan.shape[0] # Now we make sure that `w` and `wnan` returns almost the same results, # i.e. except for the row/column with missing values assert np.allclose(mad(w, axis=None), mad(wnan, axis=None), atol=1e-02) assert sum(mad(w, axis=0) == mad(wnan, axis=0)) == 9 assert sum(mad(w, axis=1) == mad(wnan, axis=1)) == 4
def skipped(x, y, method='spearman'): """ Skipped correlation (Rousselet and Pernet 2012). Parameters ---------- x, y : array_like First and second set of observations. x and y must be independent. method : str Method used to compute the correlation after outlier removal. Can be either 'spearman' (default) or 'pearson'. Returns ------- r : float Skipped correlation coefficient. pval : float Two-tailed p-value. outliers : array of bool Indicate if value is an outlier or not Notes ----- The skipped correlation involves multivariate outlier detection using a projection technique (Wilcox, 2004, 2005). First, a robust estimator of multivariate location and scatter, for instance the minimum covariance determinant estimator (MCD; Rousseeuw, 1984; Rousseeuw and van Driessen, 1999; Hubert et al., 2008) is computed. Second, data points are orthogonally projected on lines joining each of the data point to the location estimator. Third, outliers are detected using a robust technique. Finally, Spearman correlations are computed on the remaining data points and calculations are adjusted by taking into account the dependency among the remaining data points. Code inspired by Matlab code from Cyril Pernet and Guillaume Rousselet [1]_. Requires scikit-learn. References ---------- .. [1] Pernet CR, Wilcox R, Rousselet GA. Robust Correlation Analyses: False Positive and Power Validation Using a New Open Source Matlab Toolbox. Frontiers in Psychology. 2012;3:606. doi:10.3389/fpsyg.2012.00606. """ # Check that sklearn is installed from pingouin.utils import is_sklearn_installed is_sklearn_installed(raise_error=True) from scipy.stats import chi2 from sklearn.covariance import MinCovDet X = np.column_stack((x, y)) center = MinCovDet().fit(X).location_ # Detect outliers based on robust covariance nrows, ncols = X.shape gval = np.sqrt(chi2.ppf(0.975, 2)) # Loop over rows record = np.zeros(shape=(nrows, nrows)) for i in np.arange(nrows): dis = np.zeros(nrows) B = (X[i, :] - center).T bot = np.sum(B**2) if bot != 0: for j in np.arange(nrows): A = X[j, :] - center dis[j] = np.linalg.norm(A * B / bot * B) # Apply the MAD median rule MAD = mad(dis) outliers = madmedianrule(dis) record[i, :] = dis > (np.median(dis) + gval * MAD) outliers = np.sum(record, axis=0) >= 1 # Compute correlation on remaining data if method == 'spearman': r, pval = spearmanr(X[~outliers, 0], X[~outliers, 1]) else: r, pval = pearsonr(X[~outliers, 0], X[~outliers, 1]) return r, pval, outliers