def mwu(x, y, tail='two-sided'): """Mann-Whitney U Test (= Wilcoxon rank-sum test). It is the non-parametric version of the independent T-test. Parameters ---------- x, y : array_like First and second set of observations. x and y must be independent. tail : string Specify whether to return 'one-sided' or 'two-sided' p-value. Returns ------- stats : pandas DataFrame Test summary :: 'U-val' : U-value 'p-val' : p-value 'RBC' : rank-biserial correlation (effect size) 'CLES' : common language effect size Notes ----- mwu tests the hypothesis that data in x and y are samples from continuous distributions with equal medians. The test assumes that x and y are independent. This test corrects for ties and by default uses a continuity correction. Examples -------- 1. Compare the medians of two independent samples. >>> import numpy as np >>> from pingouin import mwu >>> np.random.seed(123) >>> x = np.random.uniform(low=0, high=1, size=20) >>> y = np.random.uniform(low=0.2, high=1.2, size=20) >>> print("Medians = %.2f - %.2f" % (np.median(x), np.median(y))) >>> mwu(x, y, tail='two-sided') U-val p-val RBC CLES 97.0 0.006 0.51 0.75 """ from scipy.stats import mannwhitneyu x = np.asarray(x) y = np.asarray(y) # Remove NA x, y = _remove_na(x, y, paired=False) nx = x.size ny = y.size # Compute test if tail == 'one-sided': tail = 'less' if np.median(x) < np.median(y) else 'greater' uval, pval = mannwhitneyu(x, y, use_continuity=True, alternative=tail) # Effect size 1: common language effect size (McGraw and Wong 1992) c = np.array([(a, b) for a in x for b in y]) num = max((c[:, 0] < c[:, 1]).sum(), (c[:, 0] > c[:, 1]).sum()) cles = num / (nx * ny) # Effect size 2: rank biserial correlation (Wendt 1972) rbc = 1 - (2 * uval) / (nx * ny) # Fill output DataFrame stats = pd.DataFrame({}, index=['MWU']) stats['U-val'] = uval.round(3) stats['p-val'] = pval stats['RBC'] = rbc stats['CLES'] = cles col_order = ['U-val', 'p-val', 'RBC', 'CLES'] stats = stats.reindex(columns=col_order) return stats
def circ_corrcc(x, y, tail='two-sided'): """Correlation coefficient between two circular variables. Parameters ---------- x : np.array First circular variable (expressed in radians) y : np.array Second circular variable (expressed in radians) tail : string Specify whether to return 'one-sided' or 'two-sided' p-value. Returns ------- r : float Correlation coefficient pval : float Uncorrected p-value Notes ----- Adapted from the CircStats MATLAB toolbox (Berens 2009). Use the np.deg2rad function to convert angles from degrees to radians. Please note that NaN are automatically removed. Examples -------- Compute the r and p-value of two circular variables >>> from pingouin import circ_corrcc >>> x = [0.785, 1.570, 3.141, 3.839, 5.934] >>> y = [0.593, 1.291, 2.879, 3.892, 6.108] >>> r, pval = circ_corrcc(x, y) >>> print(r, pval) 0.942, 0.066 """ from scipy.stats import norm x = np.asarray(x) y = np.asarray(y) # Check size if x.size != y.size: raise ValueError('x and y must have the same length.') # Remove NA x, y = _remove_na(x, y, paired=True) n = x.size # Compute correlation coefficient x_sin = np.sin(x - circmean(x)) y_sin = np.sin(y - circmean(y)) r = np.sum(x_sin * y_sin) / np.sqrt(np.sum(x_sin**2) * np.sum(y_sin**2)) # Compute T- and p-values tval = np.sqrt((n * (x_sin**2).mean() * (y_sin**2).mean()) / np.mean(x_sin**2 * y_sin**2)) * r # Approximately distributed as a standard normal pval = 2 * norm.sf(abs(tval)) pval = pval / 2 if tail == 'one-sided' else pval return np.round(r, 3), pval
def wilcoxon(x, y, tail='two-sided'): """Wilcoxon signed-rank test. It is the non-parametric version of the paired T-test. Parameters ---------- x, y : array_like First and second set of observations. x and y must be related (e.g repeated measures). tail : string Specify whether to return 'one-sided' or 'two-sided' p-value. Returns ------- stats : pandas DataFrame Test summary :: 'W-val' : W-value 'p-val' : p-value 'RBC' : matched pairs rank-biserial correlation (effect size) 'CLES' : common language effect size Notes ----- The Wilcoxon signed-rank test tests the null hypothesis that two related paired samples come from the same distribution. A continuity correction is applied by default. Examples -------- 1. Wilcoxon test on two related samples. >>> import numpy as np >>> from pingouin import wilcoxon >>> x = [20, 22, 19, 20, 22, 18, 24, 20] >>> y = [38, 37, 33, 29, 14, 12, 20, 22] >>> print("Medians = %.2f - %.2f" % (np.median(x), np.median(y))) >>> wilcoxon(x, y, tail='two-sided') """ from scipy.stats import wilcoxon x = np.asarray(x) y = np.asarray(y) # Remove NA x, y = _remove_na(x, y, paired=True) nx = x.size ny = y.size # Compute test wval, pval = wilcoxon(x, y, zero_method='wilcox', correction=False) pval *= .5 if tail == 'one-sided' else pval # Effect size 1: common language effect size (McGraw and Wong 1992) c = np.array([(a, b) for a in x for b in y]) num = max((c[:, 0] < c[:, 1]).sum(), (c[:, 0] > c[:, 1]).sum()) cles = num / (nx * ny) # Effect size 2: matched-pairs rank biserial correlation (Kerby 2014) rank = np.arange(x.size, 0, -1) rsum = rank.sum() fav = rank[np.sign(y - x) > 0].sum() unfav = rank[np.sign(y - x) < 0].sum() rbc = fav / rsum - unfav / rsum # Fill output DataFrame stats = pd.DataFrame({}, index=['Wilcoxon']) stats['W-val'] = wval.round(3) stats['p-val'] = pval stats['RBC'] = rbc stats['CLES'] = cles col_order = ['W-val', 'p-val', 'RBC', 'CLES'] stats = stats.reindex(columns=col_order) return stats
def circ_corrcl(x, y, tail='two-sided'): """Correlation coefficient between one circular and one linear variable random variables. Parameters ---------- x : np.array First circular variable (expressed in radians) y : np.array Second circular variable (linear) tail : string Specify whether to return 'one-sided' or 'two-sided' p-value. Returns ------- r : float Correlation coefficient pval : float Uncorrected p-value Notes ----- Python code borrowed from brainpipe (based on the MATLAB toolbox CircStats) Please note that NaN are automatically removed from datasets. Examples -------- Compute the r and p-value between one circular and one linear variables. >>> from pingouin import circ_corrcl >>> x = [0.785, 1.570, 3.141, 0.839, 5.934] >>> y = [1.593, 1.291, -0.248, -2.892, 0.102] >>> r, pval = circ_corrcl(x, y) >>> print(r, pval) 0.109, 0.971 """ from scipy.stats import pearsonr, chi2 x = np.asarray(x) y = np.asarray(y) # Check size if x.size != y.size: raise ValueError('x and y must have the same length.') # Remove NA x, y = _remove_na(x, y, paired=True) n = x.size # Compute correlation coefficent for sin and cos independently rxs = pearsonr(y, np.sin(x))[0] rxc = pearsonr(y, np.cos(x))[0] rcs = pearsonr(np.sin(x), np.cos(x))[0] # Compute angular-linear correlation (equ. 27.47) r = np.sqrt((rxc**2 + rxs**2 - 2 * rxc * rxs * rcs) / (1 - rcs**2)) # Compute p-value pval = chi2.sf(n * r**2, 2) pval = pval / 2 if tail == 'one-sided' else pval return np.round(r, 3), pval