Exemple #1
0
    def test_compute_boot_esci(self):
        """Test function compute_bootci
        Compare with Matlab bootci function
        """
        # This is the `lawdata` dataset in Matlab
        # >>> load lawdata
        # >>> x_m = gpa;
        # >>> y_m = lsat;
        x_m = [3.39, 3.3, 2.81, 3.03, 3.44, 3.07, 3.0, 3.43, 3.36, 3.13,
               3.12, 2.74, 2.76, 2.88, 2.96]
        y_m = [576, 635, 558, 578, 666, 580, 555, 661, 651, 605, 653, 575,
               545, 572, 594]
        # 1. bootci around a pearson correlation coefficient
        # Matlab: bootci(n_boot, {@corr, x_m, y_m}, 'type', 'norm');
        ci = compute_bootci(x_m, y_m, method='norm', seed=123)
        assert ci[0] == 0.52 and ci[1] == 1.05
        ci = compute_bootci(x_m, y_m, method='per', seed=123)
        assert ci[0] == 0.45 and ci[1] == 0.96
        ci = compute_bootci(x_m, y_m, method='cper', seed=123)
        assert ci[0] == 0.39 and ci[1] == 0.95
        # 2. Univariate function: mean
        ci_n = compute_bootci(x_m, func='mean', method='norm', seed=42)
        ci_p = compute_bootci(x_m, func='mean', method='per', seed=42)
        ci_c = compute_bootci(x_m, func='mean', method='cper', seed=42)
        assert ci_n[0] == 2.98 and ci_n[1] == 3.21
        assert ci_p[0] == 2.98 and ci_p[1] == 3.21
        assert ci_c[0] == 2.98 and round(ci_c[1], 1) == 3.2

        # 3. Univariate custom function: skewness
        from scipy.stats import skew
        n_boot = 10000
        ci_n = compute_bootci(x_m, func=skew, method='norm', n_boot=n_boot,
                              decimals=1, seed=42)
        ci_p = compute_bootci(x_m, func=skew, method='per', n_boot=n_boot,
                              decimals=1, seed=42)
        ci_c = compute_bootci(x_m, func=skew, method='cper', n_boot=n_boot,
                              decimals=1, seed=42)
        assert ci_n[0] == -0.7 and ci_n[1] == 0.8
        assert ci_p[0] == -0.7 and ci_p[1] == 0.8
        assert ci_c[0] == -0.7 and ci_c[1] == 0.8

        # 4. Bivariate custom function: paired T-test
        from scipy.stats import ttest_rel
        ci_n = compute_bootci(x_m, y_m, func=lambda x, y: ttest_rel(x, y)[0],
                              method='norm', n_boot=n_boot, decimals=0,
                              seed=42)
        ci_p = compute_bootci(x_m, y_m, func=lambda x, y: ttest_rel(x, y)[0],
                              method='per', n_boot=n_boot, decimals=0,
                              seed=42)
        ci_c = compute_bootci(x_m, y_m, func=lambda x, y: ttest_rel(x, y)[0],
                              method='cper', n_boot=n_boot, decimals=0,
                              seed=42)
        assert ci_n[0] == -69 and ci_n[1] == -35
        assert ci_p[0] == -79 and ci_p[1] == -48
        assert ci_c[0] == -68 and ci_c[1] == -47

        # 5. Test all combinations
        from itertools import product
        methods = ['norm', 'per', 'cper']
        funcs = ['spearman', 'pearson', 'cohen', 'hedges']
        paired = [True, False]
        pr = list(product(methods, funcs, paired))
        for m, f, p in pr:
            compute_bootci(x, y, func=f, method=m, seed=123, n_boot=100)

        # Now the univariate function
        funcs = ['mean', 'std', 'var']
        for m, f in list(product(methods, funcs)):
            compute_bootci(x, func=f, method=m, seed=123, n_boot=100)

        with pytest.raises(ValueError):
            compute_bootci(x, y, func='wrong')
        # Using a custom function
        compute_bootci(x, y,
                       func=lambda x, y: np.sum(np.exp(x) / np.exp(y)),
                       n_boot=10000, decimals=4, confidence=.68, seed=None)
        # Get the bootstrapped distribution
        _, bdist = compute_bootci(x, y, return_dist=True, n_boot=1500)
        assert bdist.size == 1500
Exemple #2
0
def plot_skipped_corr(x, y, xlabel=None, ylabel=None, n_boot=2000, seed=None):
    """Plot the bootstrapped 95% confidence intervals and distribution
    of a robust Skipped correlation.

    Parameters
    ----------
    x, y : 1D-arrays or list
        Samples
    xlabel, ylabel : str
        Axes labels
    n_boot : int
        Number of bootstrap iterations for the computation of the
        confidence intervals
    seed : int
        Random seed generator for the bootstrap confidence intervals.

    Returns
    --------
    fig : matplotlib Figure instance
        Matplotlib Figure. To get the individual axes, use fig.axes.

    Notes
    -----
    This function is inspired by the Matlab Robust Correlation Toolbox (Pernet,
    Wilcox and Rousselet, 2012). It uses the skipped correlation to determine
    the outliers. Note that this function requires the scikit-learn package.

    References
    ----------
    .. [1] Pernet, C.R., Wilcox, R., Rousselet, G.A., 2012. Robust correlation
           analyses: false positive and power validation using a new open
           source matlab toolbox. Front. Psychol. 3, 606.
           https://doi.org/10.3389/fpsyg.2012.00606

    Examples
    --------

    Plot a robust Skipped correlation with bootstrapped confidence intervals

    .. plot::

        >>> import numpy as np
        >>> import pingouin as pg
        >>> np.random.seed(123)
        >>> mean, cov, n = [170, 70], [[20, 10], [10, 20]], 30
        >>> x, y = np.random.multivariate_normal(mean, cov, n).T
        >>> # Introduce two outliers
        >>> x[10], y[10] = 160, 100
        >>> x[8], y[8] = 165, 90
        >>> fig = pg.plot_skipped_corr(x, y, xlabel='Height', ylabel='Weight')
    """
    from pingouin.correlation import skipped
    from scipy.stats import pearsonr
    from pingouin.effsize import compute_bootci

    # Safety check
    x = np.asarray(x)
    y = np.asarray(y)
    assert x.size == y.size

    # Skipped Spearman / Pearson correlations
    r, p, outliers = skipped(x, y, method='spearman')
    r_pearson, _ = pearsonr(x[~outliers], y[~outliers])

    # Bootstrapped skipped Spearman distribution & CI
    spearman_ci, spearman_dist = compute_bootci(x=x[~outliers],
                                                y=y[~outliers],
                                                func='spearman',
                                                n_boot=n_boot,
                                                return_dist=True,
                                                seed=seed)

    # Bootstrapped skipped Pearson distribution & CI
    pearson_ci, pearson_dist = compute_bootci(x=x[~outliers],
                                              y=y[~outliers],
                                              func='pearson',
                                              n_boot=n_boot,
                                              return_dist=True,
                                              seed=seed)

    # START THE PLOT
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 4.2))
    # plt.subplots_adjust(wspace=0.3)
    sns.despine()

    # Scatter plot and regression lines
    sns.regplot(x[~outliers], y[~outliers], ax=ax1, color='darkcyan')
    ax1.scatter(x[outliers], y[outliers], color='indianred', label='outliers')
    ax1.scatter(x[~outliers], y[~outliers], color='seagreen', label='good')

    # Labels
    xlabel = 'x' if xlabel is None else xlabel
    ylabel = 'y' if ylabel is None else ylabel
    ax1.set_xlabel(xlabel)
    ax1.set_ylabel(ylabel)
    ax1.set_title('Outliers (n={})'.format(sum(outliers)), y=1.05)

    # Spearman distribution
    sns.distplot(spearman_dist, kde=True, ax=ax2, color='darkcyan')
    for i in spearman_ci:
        ax2.axvline(x=i, color='coral', lw=2)
    ax2.axvline(x=0, color='k', ls='--', lw=1.5)
    ax2.set_ylabel('Density of bootstrap samples')
    ax2.set_xlabel('Correlation coefficient')
    ax2.set_title('Skipped Spearman r = {}\n95% CI = [{}, {}]'.format(
        r.round(2), spearman_ci[0], spearman_ci[1]),
                  y=1.05)

    # Pearson dististribution
    sns.distplot(pearson_dist, kde=True, ax=ax3, color='steelblue')
    for i in pearson_ci:
        ax3.axvline(x=i, color='coral', lw=2)
    ax3.axvline(x=0, color='k', ls='--', lw=1.5)
    ax3.set_xlabel('Correlation coefficient')
    ax3.set_title('Skipped Pearson r = {}\n95% CI = [{}, {}]'.format(
        r_pearson.round(2), pearson_ci[0], pearson_ci[1]),
                  y=1.05)

    # Optimize layout
    plt.tight_layout()

    return fig
    def test_compute_boot_esci(self):
        """Test function compute_bootci"""
        # Compare with Matlab
        x_m = [
            3.39, 3.3, 2.81, 3.03, 3.44, 3.07, 3.0, 3.43, 3.36, 3.13, 3.12,
            2.74, 2.76, 2.88, 2.96
        ]
        y_m = [
            576, 635, 558, 578, 666, 580, 555, 661, 651, 605, 653, 575, 545,
            572, 594
        ]
        ci = compute_bootci(x_m, y_m, method='norm', seed=123, decimals=2)
        assert ci[0] == 0.52 and ci[1] == 1.05
        ci = compute_bootci(x_m, y_m, method='per', seed=123, decimals=2)
        assert ci[0] == 0.45 and ci[1] == 0.96
        ci = compute_bootci(x_m, y_m, method='cper', seed=123, decimals=2)
        assert ci[0] == 0.39 and ci[1] == 0.95
        # Test all combinations
        from itertools import product
        methods = ['norm', 'per', 'cper']
        funcs = ['spearman', 'pearson', 'cohen', 'hedges']
        paired = [True, False]
        pr = list(product(methods, funcs, paired))
        for m, f, p in pr:
            compute_bootci(x, y, func=f, method=m, seed=123, n_boot=100)

        # Now the univariate function
        funcs = ['mean', 'std', 'var']
        for m, f in list(product(methods, funcs)):
            compute_bootci(x, func=f, method=m, seed=123, n_boot=100)

        with pytest.raises(ValueError):
            compute_bootci(x, y, func='wrong')
        # Using a custom function
        compute_bootci(x,
                       y,
                       func=lambda x, y: np.sum(np.exp(x) / np.exp(y)),
                       n_boot=10000,
                       decimals=4,
                       confidence=.68,
                       seed=None)
        # Get the bootstrapped distribution
        _, bdist = compute_bootci(x, y, return_dist=True, n_boot=1500)
        assert bdist.size == 1500