def test_power_corr(self): """Test function power_corr. Values are compared to the pwr R package. """ # Two-sided assert np.allclose(power_corr(r=0.5, n=20), 0.6378746) assert np.allclose(power_corr(r=0.5, power=0.80), 28.24841) assert np.allclose(power_corr(n=20, power=0.80), 0.5821478) assert np.allclose(power_corr(r=0.5, n=20, power=0.80, alpha=None), 0.1377332, rtol=1e-03) # One-sided (e.g. = alternative = 'Greater' in R) assert np.allclose(power_corr(r=0.5, n=20, tail='one-sided'), 0.7509873) assert np.allclose(power_corr(r=0.5, power=0.80, tail='one-sided'), 22.60907) assert np.allclose(power_corr(n=20, power=0.80, tail='one-sided'), 0.5286949) # Error with pytest.raises(ValueError): power_corr(r=0.5)
def corr(x, y, tail='two-sided', method='pearson'): """(Robust) correlation between two variables. Parameters ---------- x, y : array_like First and second set of observations. x and y must be independent. tail : string Specify whether to return 'one-sided' or 'two-sided' p-value. method : string Specify which method to use for the computation of the correlation coefficient. Available methods are :: 'pearson' : Pearson product-moment correlation 'spearman' : Spearman rank-order correlation 'kendall' : Kendall’s tau (ordinal data) 'percbend' : percentage bend correlation (robust) 'shepherd' : Shepherd's pi correlation (robust Spearman) 'skipped' : skipped correlation (robust Spearman, requires sklearn) Returns ------- stats : pandas DataFrame Test summary :: 'n' : Sample size (after NaN removal) 'outliers' : number of outliers (only for 'shepherd' or 'skipped') 'r' : Correlation coefficient 'CI95' : 95% parametric confidence intervals 'r2' : R-squared 'adj_r2' : Adjusted R-squared 'p-val' : one or two tailed p-value 'BF10' : Bayes Factor of the alternative hypothesis (Pearson only) 'power' : achieved power of the test (= 1 - type II error). See also -------- pairwise_corr : Pairwise correlation between columns of a pandas DataFrame partial_corr : Partial correlation Notes ----- The Pearson correlation coefficient measures the linear relationship between two datasets. Strictly speaking, Pearson's correlation requires that each dataset be normally distributed. Correlations of -1 or +1 imply an exact linear relationship. The Spearman correlation is a nonparametric measure of the monotonicity of the relationship between two datasets. Unlike the Pearson correlation, the Spearman correlation does not assume that both datasets are normally distributed. Correlations of -1 or +1 imply an exact monotonic relationship. Kendall’s tau is a measure of the correspondence between two rankings. Values close to 1 indicate strong agreement, values close to -1 indicate strong disagreement. The percentage bend correlation [1]_ is a robust method that protects against univariate outliers. The Shepherd's pi [2]_ and skipped [3]_, [4]_ correlations are both robust methods that returns the Spearman's rho after bivariate outliers removal. Note that the skipped correlation requires that the scikit-learn package is installed (for computing the minimum covariance determinant). Please note that rows with NaN are automatically removed. If ``method='pearson'``, The Bayes Factor is calculated using the :py:func:`pingouin.bayesfactor_pearson` function. References ---------- .. [1] Wilcox, R.R., 1994. The percentage bend correlation coefficient. Psychometrika 59, 601–616. https://doi.org/10.1007/BF02294395 .. [2] Schwarzkopf, D.S., De Haas, B., Rees, G., 2012. Better ways to improve standards in brain-behavior correlation analysis. Front. Hum. Neurosci. 6, 200. https://doi.org/10.3389/fnhum.2012.00200 .. [3] Rousselet, G.A., Pernet, C.R., 2012. Improving standards in brain-behavior correlation analyses. Front. Hum. Neurosci. 6, 119. https://doi.org/10.3389/fnhum.2012.00119 .. [4] Pernet, C.R., Wilcox, R., Rousselet, G.A., 2012. Robust correlation analyses: false positive and power validation using a new open source matlab toolbox. Front. Psychol. 3, 606. https://doi.org/10.3389/fpsyg.2012.00606 Examples -------- 1. Pearson correlation >>> import numpy as np >>> # Generate random correlated samples >>> np.random.seed(123) >>> mean, cov = [4, 6], [(1, .5), (.5, 1)] >>> x, y = np.random.multivariate_normal(mean, cov, 30).T >>> # Compute Pearson correlation >>> from pingouin import corr >>> corr(x, y) n r CI95% r2 adj_r2 p-val BF10 power pearson 30 0.491 [0.16, 0.72] 0.242 0.185 0.005813 8.55 0.809 2. Pearson correlation with two outliers >>> x[3], y[5] = 12, -8 >>> corr(x, y) n r CI95% r2 adj_r2 p-val BF10 power pearson 30 0.147 [-0.23, 0.48] 0.022 -0.051 0.439148 0.302 0.121 3. Spearman correlation >>> corr(x, y, method="spearman") n r CI95% r2 adj_r2 p-val power spearman 30 0.401 [0.05, 0.67] 0.161 0.099 0.028034 0.61 4. Percentage bend correlation (robust) >>> corr(x, y, method='percbend') n r CI95% r2 adj_r2 p-val power percbend 30 0.389 [0.03, 0.66] 0.151 0.089 0.033508 0.581 5. Shepherd's pi correlation (robust) >>> corr(x, y, method='shepherd') n outliers r CI95% r2 adj_r2 p-val power shepherd 30 2 0.437 [0.09, 0.69] 0.191 0.131 0.020128 0.694 6. Skipped spearman correlation (robust) >>> corr(x, y, method='skipped') n outliers r CI95% r2 adj_r2 p-val power skipped 30 2 0.437 [0.09, 0.69] 0.191 0.131 0.020128 0.694 7. One-tailed Pearson correlation >>> corr(x, y, tail="one-sided", method='pearson') n r CI95% r2 adj_r2 p-val BF10 power pearson 30 0.147 [-0.23, 0.48] 0.022 -0.051 0.219574 0.467 0.194 8. Using columns of a pandas dataframe >>> import pandas as pd >>> data = pd.DataFrame({'x': x, 'y': y}) >>> corr(data['x'], data['y']) n r CI95% r2 adj_r2 p-val BF10 power pearson 30 0.147 [-0.23, 0.48] 0.022 -0.051 0.439148 0.302 0.121 """ x = np.asarray(x) y = np.asarray(y) # Check size if x.size != y.size: raise ValueError('x and y must have the same length.') # Remove NA x, y = remove_na(x, y, paired=True) nx = x.size # Compute correlation coefficient if method == 'pearson': r, pval = pearsonr(x, y) elif method == 'spearman': r, pval = spearmanr(x, y) elif method == 'kendall': r, pval = kendalltau(x, y) elif method == 'percbend': r, pval = percbend(x, y) elif method == 'shepherd': r, pval, outliers = shepherd(x, y) elif method == 'skipped': r, pval, outliers = skipped(x, y, method='spearman') else: raise ValueError('Method not recognized.') assert not np.isnan(r), 'Correlation returned NaN. Check your data.' # Compute r2 and adj_r2 r2 = r**2 adj_r2 = 1 - (((1 - r2) * (nx - 1)) / (nx - 3)) # Compute the parametric 95% confidence interval and power if r2 < 1: ci = compute_esci(stat=r, nx=nx, ny=nx, eftype='r') pr = round(power_corr(r=r, n=nx, power=None, alpha=0.05, tail=tail), 3) else: ci = [1., 1.] pr = np.inf # Create dictionnary stats = { 'n': nx, 'r': round(r, 3), 'r2': round(r2, 3), 'adj_r2': round(adj_r2, 3), 'CI95%': [ci], 'p-val': pval if tail == 'two-sided' else .5 * pval, 'power': pr } if method in ['shepherd', 'skipped']: stats['outliers'] = sum(outliers) # Compute the BF10 for Pearson correlation only if method == 'pearson': if r2 < 1: stats['BF10'] = bayesfactor_pearson(r, nx, tail=tail) else: stats['BF10'] = str(np.inf) # Convert to DataFrame stats = pd.DataFrame.from_records(stats, index=[method]) # Define order col_keep = [ 'n', 'outliers', 'r', 'CI95%', 'r2', 'adj_r2', 'p-val', 'BF10', 'power' ] col_order = [k for k in col_keep if k in stats.keys().tolist()] return stats[col_order]
def rm_corr(data=None, x=None, y=None, subject=None, tail='two-sided'): """Repeated measures correlation. Parameters ---------- data : pd.DataFrame Dataframe. x, y : string Name of columns in ``data`` containing the two dependent variables. subject : string Name of column in ``data`` containing the subject indicator. tail : string Specify whether to return 'one-sided' or 'two-sided' p-value. Returns ------- stats : pandas DataFrame Test summary :: 'r' : Repeated measures correlation coefficient 'dof' : Degrees of freedom 'pval' : one or two tailed p-value 'CI95' : 95% parametric confidence intervals 'power' : achieved power of the test (= 1 - type II error). Notes ----- Repeated measures correlation (rmcorr) is a statistical technique for determining the common within-individual association for paired measures assessed on two or more occasions for multiple individuals. From Bakdash and Marusich (2017): "Rmcorr accounts for non-independence among observations using analysis of covariance (ANCOVA) to statistically adjust for inter-individual variability. By removing measured variance between-participants, rmcorr provides the best linear fit for each participant using parallel regression lines (the same slope) with varying intercepts. Like a Pearson correlation coefficient, the rmcorr coefficient is bounded by − 1 to 1 and represents the strength of the linear association between two variables." Results have been tested against the `rmcorr` R package. Please note that NaN are automatically removed from the dataframe (listwise deletion). References ---------- .. [1] Bakdash, J.Z., Marusich, L.R., 2017. Repeated Measures Correlation. Front. Psychol. 8, 456. https://doi.org/10.3389/fpsyg.2017.00456 .. [2] Bland, J. M., & Altman, D. G. (1995). Statistics notes: Calculating correlation coefficients with repeated observations: Part 1—correlation within subjects. Bmj, 310(6977), 446. .. [3] https://github.com/cran/rmcorr Examples -------- >>> import pingouin as pg >>> df = pg.read_dataset('rm_corr') >>> pg.rm_corr(data=df, x='pH', y='PacO2', subject='Subject') r dof pval CI95% power rm_corr -0.507 38 0.000847 [-0.71, -0.23] 0.93 """ from pingouin import ancova, power_corr # Safety checks assert isinstance(data, pd.DataFrame), 'Data must be a DataFrame' assert x in data.columns, 'The %s column is not in data.' % x assert y in data.columns, 'The %s column is not in data.' % y assert data[x].dtype.kind in 'bfi', '%s must be numeric.' % x assert data[y].dtype.kind in 'bfi', '%s must be numeric.' % y assert subject in data.columns, 'The %s column is not in data.' % subject if data[subject].nunique() < 3: raise ValueError('rm_corr requires at least 3 unique subjects.') # Remove missing values data = data[[x, y, subject]].dropna(axis=0) # Using PINGOUIN aov = ancova(dv=y, covar=x, between=subject, data=data) bw = aov.bw_ # Beta within parameter sign = np.sign(bw) dof = int(aov.at[2, 'DF']) n = dof + 2 ssfactor = aov.at[1, 'SS'] sserror = aov.at[2, 'SS'] rm = sign * np.sqrt(ssfactor / (ssfactor + sserror)) pval = aov.at[1, 'p-unc'] pval = pval * 0.5 if tail == 'one-sided' else pval ci = compute_esci(stat=rm, nx=n, eftype='pearson').tolist() pwr = power_corr(r=rm, n=n, tail=tail) # Convert to Dataframe stats = pd.DataFrame( { "r": round(rm, 3), "dof": int(dof), "pval": pval, "CI95%": str(ci), "power": round(pwr, 3) }, index=["rm_corr"]) return stats
def corr(x, y, tail='two-sided', method='pearson'): """(Robust) correlation between two variables. Parameters ---------- x, y : array_like First and second set of observations. ``x`` and ``y`` must be independent. tail : string Specify whether to return ``'one-sided'`` or ``'two-sided'`` p-value. Note that the former are simply half the latter. method : string Correlation type: * ``'pearson'``: Pearson :math:`r` product-moment correlation * ``'spearman'``: Spearman :math:`\\rho` rank-order correlation * ``'kendall'``: Kendall's :math:`\\tau` correlation (for ordinal data) * ``'bicor'``: Biweight midcorrelation (robust) * ``'percbend'``: Percentage bend correlation (robust) * ``'shepherd'``: Shepherd's pi correlation (robust) * ``'skipped'``: Skipped correlation (robust) Returns ------- stats : :py:class:`pandas.DataFrame` * ``'n'``: Sample size (after removal of missing values) * ``'outliers'``: number of outliers, only if a robust method was used * ``'r'``: Correlation coefficient * ``'CI95'``: 95% parametric confidence intervals around :math:`r` * ``'r2'``: R-squared (:math:`= r^2`) * ``'adj_r2'``: Adjusted R-squared * ``'p-val'``: tail of the test * ``'BF10'``: Bayes Factor of the alternative hypothesis (only for Pearson correlation) * ``'power'``: achieved power of the test (= 1 - type II error). See also -------- pairwise_corr : Pairwise correlation between columns of a pandas DataFrame partial_corr : Partial correlation rm_corr : Repeated measures correlation Notes ----- The `Pearson correlation coefficient <https://en.wikipedia.org/wiki/Pearson_correlation_coefficient>`_ measures the linear relationship between two datasets. Strictly speaking, Pearson's correlation requires that each dataset be normally distributed. Correlations of -1 or +1 imply a perfect negative and positive linear relationship, respectively, with 0 indicating the absence of association. .. math:: r_{xy} = \\frac{\\sum_i(x_i - \\bar{x})(y_i - \\bar{y})} {\\sqrt{\\sum_i(x_i - \\bar{x})^2} \\sqrt{\\sum_i(y_i - \\bar{y})^2}} = \\frac{\\text{cov}(x, y)}{\\sigma_x \\sigma_y} where :math:`\\text{cov}` is the sample covariance and :math:`\\sigma` is the sample standard deviation. If ``method='pearson'``, The Bayes Factor is calculated using the :py:func:`pingouin.bayesfactor_pearson` function. The `Spearman correlation coefficient <https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_ is a non-parametric measure of the monotonicity of the relationship between two datasets. Unlike the Pearson correlation, the Spearman correlation does not assume that both datasets are normally distributed. Correlations of -1 or +1 imply an exact negative and positive monotonic relationship, respectively. Mathematically, the Spearman correlation coefficient is defined as the Pearson correlation coefficient between the `rank variables <https://en.wikipedia.org/wiki/Ranking>`_. The `Kendall correlation coefficient <https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient>`_ is a measure of the correspondence between two rankings. Values also range from -1 (perfect disagreement) to 1 (perfect agreement), with 0 indicating the absence of association. Consistent with :py:func:`scipy.stats.kendalltau`, Pingouin returns the Tau-b coefficient, which adjusts for ties: .. math:: \\tau_B = \\frac{(P - Q)}{\\sqrt{(P + Q + T) (P + Q + U)}} where :math:`P` is the number of concordant pairs, :math:`Q` the number of discordand pairs, :math:`T` the number of ties in x, and :math:`U` the number of ties in y. The `biweight midcorrelation <https://en.wikipedia.org/wiki/Biweight_midcorrelation>`_ and percentage bend correlation [1]_ are both robust methods that protects against *univariate* outliers by down-weighting observations that deviate too much from the median. The Shepherd pi [2]_ correlation and skipped [3]_, [4]_ correlation are both robust methods that returns the Spearman correlation coefficient after removing *bivariate* outliers. Briefly, the Shepherd pi uses a bootstrapping of the Mahalanobis distance to identify outliers, while the skipped correlation is based on the minimum covariance determinant (which requires scikit-learn). Note that these two methods are significantly slower than the previous ones. .. important:: Please note that rows with missing values (NaN) are automatically removed. References ---------- .. [1] Wilcox, R.R., 1994. The percentage bend correlation coefficient. Psychometrika 59, 601–616. https://doi.org/10.1007/BF02294395 .. [2] Schwarzkopf, D.S., De Haas, B., Rees, G., 2012. Better ways to improve standards in brain-behavior correlation analysis. Front. Hum. Neurosci. 6, 200. https://doi.org/10.3389/fnhum.2012.00200 .. [3] Rousselet, G.A., Pernet, C.R., 2012. Improving standards in brain-behavior correlation analyses. Front. Hum. Neurosci. 6, 119. https://doi.org/10.3389/fnhum.2012.00119 .. [4] Pernet, C.R., Wilcox, R., Rousselet, G.A., 2012. Robust correlation analyses: false positive and power validation using a new open source matlab toolbox. Front. Psychol. 3, 606. https://doi.org/10.3389/fpsyg.2012.00606 Examples -------- 1. Pearson correlation >>> import numpy as np >>> import pingouin as pg >>> # Generate random correlated samples >>> np.random.seed(123) >>> mean, cov = [4, 6], [(1, .5), (.5, 1)] >>> x, y = np.random.multivariate_normal(mean, cov, 30).T >>> # Compute Pearson correlation >>> pg.corr(x, y).round(3) n r CI95% r2 adj_r2 p-val BF10 power pearson 30 0.491 [0.16, 0.72] 0.242 0.185 0.006 8.55 0.809 2. Pearson correlation with two outliers >>> x[3], y[5] = 12, -8 >>> pg.corr(x, y).round(3) n r CI95% r2 adj_r2 p-val BF10 power pearson 30 0.147 [-0.23, 0.48] 0.022 -0.051 0.439 0.302 0.121 3. Spearman correlation (robust to outliers) >>> pg.corr(x, y, method="spearman").round(3) n r CI95% r2 adj_r2 p-val power spearman 30 0.401 [0.05, 0.67] 0.161 0.099 0.028 0.61 4. Biweight midcorrelation (robust) >>> pg.corr(x, y, method="bicor").round(3) n r CI95% r2 adj_r2 p-val power bicor 30 0.393 [0.04, 0.66] 0.155 0.092 0.031 0.592 5. Percentage bend correlation (robust) >>> pg.corr(x, y, method='percbend').round(3) n r CI95% r2 adj_r2 p-val power percbend 30 0.389 [0.03, 0.66] 0.151 0.089 0.034 0.581 6. Shepherd's pi correlation (robust) >>> pg.corr(x, y, method='shepherd').round(3) n outliers r CI95% r2 adj_r2 p-val power shepherd 30 2 0.437 [0.09, 0.69] 0.191 0.131 0.02 0.694 7. Skipped spearman correlation (robust) >>> pg.corr(x, y, method='skipped').round(3) n outliers r CI95% r2 adj_r2 p-val power skipped 30 2 0.437 [0.09, 0.69] 0.191 0.131 0.02 0.694 8. One-tailed Pearson correlation >>> pg.corr(x, y, tail="one-sided", method='pearson').round(3) n r CI95% r2 adj_r2 p-val BF10 power pearson 30 0.147 [-0.23, 0.48] 0.022 -0.051 0.22 0.467 0.194 9. Using columns of a pandas dataframe >>> import pandas as pd >>> data = pd.DataFrame({'x': x, 'y': y}) >>> pg.corr(data['x'], data['y']).round(3) n r CI95% r2 adj_r2 p-val BF10 power pearson 30 0.147 [-0.23, 0.48] 0.022 -0.051 0.439 0.302 0.121 """ # Safety check x = np.asarray(x) y = np.asarray(y) assert x.ndim == y.ndim == 1, 'x and y must be 1D array.' assert x.size == y.size, 'x and y must have the same length.' # Remove rows with missing values x, y = remove_na(x, y, paired=True) nx = x.size # Compute correlation coefficient if method == 'pearson': r, pval = pearsonr(x, y) elif method == 'spearman': r, pval = spearmanr(x, y) elif method == 'kendall': r, pval = kendalltau(x, y) elif method == 'bicor': r, pval = bicor(x, y) elif method == 'percbend': r, pval = percbend(x, y) elif method == 'shepherd': r, pval, outliers = shepherd(x, y) elif method == 'skipped': r, pval, outliers = skipped(x, y) else: raise ValueError('Method not recognized.') if np.isnan(r): # Correlation failed -- new in version v0.3.4, instead of raising an # error we just return a dataframe full of NaN (except sample size). # This avoid sudden stop in pingouin.pairwise_corr. return pd.DataFrame( { 'n': nx, 'r': np.nan, 'CI95%': np.nan, 'r2': np.nan, 'adj_r2': np.nan, 'p-val': np.nan, 'BF10': np.nan, 'power': np.nan }, index=[method]) # Compute r2 and adj_r2 r2 = r**2 adj_r2 = 1 - (((1 - r2) * (nx - 1)) / (nx - 3)) # Compute the parametric 95% confidence interval and power ci = compute_esci(stat=r, nx=nx, ny=nx, eftype='r') pr = power_corr(r=r, n=nx, power=None, alpha=0.05, tail=tail), # Create dictionnary stats = { 'n': nx, 'r': r, 'r2': r2, 'adj_r2': adj_r2, 'CI95%': [ci], 'p-val': pval if tail == 'two-sided' else .5 * pval, 'power': pr } if method in ['shepherd', 'skipped']: stats['outliers'] = sum(outliers) # Compute the BF10 for Pearson correlation only if method == 'pearson': stats['BF10'] = bayesfactor_pearson(r, nx, tail=tail) # Convert to DataFrame stats = pd.DataFrame.from_records(stats, index=[method]) # Define order col_keep = [ 'n', 'outliers', 'r', 'CI95%', 'r2', 'adj_r2', 'p-val', 'BF10', 'power' ] col_order = [k for k in col_keep if k in stats.keys().tolist()] return stats[col_order]
def rm_corr(data=None, x=None, y=None, subject=None, tail='two-sided'): """Repeated measures correlation. Parameters ---------- data : :py:class:`pandas.DataFrame` Dataframe. x, y : string Name of columns in ``data`` containing the two dependent variables. subject : string Name of column in ``data`` containing the subject indicator. tail : string Specify whether to return 'one-sided' or 'two-sided' p-value. Returns ------- stats : :py:class:`pandas.DataFrame` * ``'r'``: Repeated measures correlation coefficient * ``'dof'``: Degrees of freedom * ``'pval'``: one or two tailed p-value * ``'CI95'``: 95% parametric confidence intervals * ``'power'``: achieved power of the test (= 1 - type II error). See also -------- plot_rm_corr Notes ----- Repeated measures correlation (rmcorr) is a statistical technique for determining the common within-individual association for paired measures assessed on two or more occasions for multiple individuals. From `Bakdash and Marusich (2017) <https://doi.org/10.3389/fpsyg.2017.00456>`_: *Rmcorr accounts for non-independence among observations using analysis of covariance (ANCOVA) to statistically adjust for inter-individual variability. By removing measured variance between-participants, rmcorr provides the best linear fit for each participant using parallel regression lines (the same slope) with varying intercepts. Like a Pearson correlation coefficient, the rmcorr coefficient is bounded by − 1 to 1 and represents the strength of the linear association between two variables.* Results have been tested against the `rmcorr <https://github.com/cran/rmcorr>`_ R package. Please note that missing values are automatically removed from the dataframe (listwise deletion). Examples -------- >>> import pingouin as pg >>> df = pg.read_dataset('rm_corr') >>> pg.rm_corr(data=df, x='pH', y='PacO2', subject='Subject') r dof pval CI95% power rm_corr -0.50677 38 0.000847 [-0.71, -0.23] 0.929579 Now plot using the :py:func:`pingouin.plot_rm_corr` function: .. plot:: >>> import pingouin as pg >>> df = pg.read_dataset('rm_corr') >>> g = pg.plot_rm_corr(data=df, x='pH', y='PacO2', subject='Subject') """ from pingouin import ancova, power_corr # Safety checks assert isinstance(data, pd.DataFrame), 'Data must be a DataFrame' assert x in data.columns, 'The %s column is not in data.' % x assert y in data.columns, 'The %s column is not in data.' % y assert data[x].dtype.kind in 'bfiu', '%s must be numeric.' % x assert data[y].dtype.kind in 'bfiu', '%s must be numeric.' % y assert subject in data.columns, 'The %s column is not in data.' % subject if data[subject].nunique() < 3: raise ValueError('rm_corr requires at least 3 unique subjects.') # Remove missing values data = data[[x, y, subject]].dropna(axis=0) # Using PINGOUIN # For max precision, make sure rounding is disabled old_options = options.copy() options['round'] = None aov = ancova(dv=y, covar=x, between=subject, data=data) options.update(old_options) # restore options bw = aov.bw_ # Beta within parameter sign = np.sign(bw) dof = int(aov.at[2, 'DF']) n = dof + 2 ssfactor = aov.at[1, 'SS'] sserror = aov.at[2, 'SS'] rm = sign * np.sqrt(ssfactor / (ssfactor + sserror)) pval = aov.at[1, 'p-unc'] pval = pval * 0.5 if tail == 'one-sided' else pval ci = compute_esci(stat=rm, nx=n, eftype='pearson').tolist() pwr = power_corr(r=rm, n=n, tail=tail) # Convert to Dataframe stats = pd.DataFrame({"r": rm, "dof": int(dof), "pval": pval, "CI95%": [ci], "power": pwr}, index=["rm_corr"]) return _postprocess_dataframe(stats)
def test_power_corr(self): """Test function power_corr. Values are compared to the pwr R package. """ # Two-sided assert np.allclose(power_corr(r=0.5, n=20), 0.6378746) assert np.allclose(power_corr(r=0.5, power=0.80), 28.24841) assert np.allclose(power_corr(n=20, power=0.80), 0.5821478) assert np.allclose(power_corr(r=0.5, n=20, power=0.80, alpha=None), 0.1377332, rtol=1e-03) # Greater assert np.allclose(power_corr(r=0.5, n=20, alternative='greater'), 0.7509873) assert np.allclose(power_corr(r=-0.1, n=20, alternative='greater'), 0.01941224) assert np.allclose( power_corr(r=0.5, power=0.80, alternative='greater'), 22.60907) assert np.allclose(power_corr(n=20, power=0.80, alternative='greater'), 0.5286949) # Less assert np.allclose(power_corr(r=-0.5, n=20, alternative='less'), 0.7509873) assert np.allclose(power_corr(r=-0.1, n=20, alternative='less'), 0.1118106) assert np.allclose(power_corr(r=0.1, n=20, alternative='less'), 0.01941224) assert np.allclose(power_corr(r=-0.5, power=0.80, alternative='less'), 22.60907) assert np.allclose(power_corr(n=20, power=0.80, alternative='less'), -0.5286949) # Error & Warning with pytest.raises(ValueError): power_corr(r=0.5) with pytest.warns(UserWarning): power_corr(r=0.5, n=4) power_corr(power=0.80, n=4)