Example #1
0
 def test_bayesfactor_ttest(self):
     """Test function bayesfactor_ttest."""
     assert float(bayesfactor_ttest(3.5, 20, 20)) == 26.743
     assert float(bayesfactor_ttest(3.5, 20)) == 17.185
     assert float(bayesfactor_ttest(3.5, 20, 1)) == 17.185
     # Compare against BayesFactor::testBF
     # >>> ttestBF(df$x, df$y, paired = FALSE, rscale = "medium")
     assert ttest(x, y).at['T-test', 'BF10'] == '0.183'
     assert ttest(x, y, paired=True).at['T-test', 'BF10'] == '0.135'
     assert int(float(ttest(x, z).at['T-test', 'BF10'])) == 1290
     assert int(float(ttest(x, z, paired=True).at['T-test', 'BF10'])) == 420
     # Now check the alternative tails
     assert float(bayesfactor_ttest(3.5, 20, 20, tail='greater')) > 1
     assert float(bayesfactor_ttest(3.5, 20, 20, tail='less')) < 1
     assert float(bayesfactor_ttest(-3.5, 20, 20, tail='greater')) < 1
     assert float(bayesfactor_ttest(-3.5, 20, 20, tail='less')) > 1
     # Check with wrong T-value
     assert bayesfactor_ttest(np.nan, 20, paired=True) == 'nan'
Example #2
0
 def test_bayesfactor_ttest(self):
     """Test function bayesfactor_ttest."""
     # check for approximate equality with 1e-3 tolerance
     # (as this is how we store the values here)
     assert bayesfactor_ttest(3.5, 20, 20) == appr(26.743)
     assert bayesfactor_ttest(3.5, 20) == appr(17.185)
     assert bayesfactor_ttest(3.5, 20, 1) == appr(17.185)
     # Compare against BayesFactor::testBF
     # >>> ttestBF(df$x, df$y, paired = FALSE, rscale = "medium")
     assert ttest(x, y).at['T-test', 'BF10'] == '0.183'
     assert ttest(x, y, paired=True).at['T-test', 'BF10'] == '0.135'
     assert int(float(ttest(x, z).at['T-test', 'BF10'])) == 1290
     assert int(float(ttest(x, z, paired=True).at['T-test', 'BF10'])) == 420
     # Now check the alternative tails
     assert bayesfactor_ttest(3.5, 20, 20, alternative='greater') > 1
     assert bayesfactor_ttest(3.5, 20, 20, alternative='less') < 1
     assert bayesfactor_ttest(-3.5, 20, 20, alternative='greater') < 1
     assert bayesfactor_ttest(-3.5, 20, 20, alternative='less') > 1
     # Check with wrong T-value
     assert np.isnan(bayesfactor_ttest(np.nan, 20, paired=True))
Example #3
0
def tost_upper(x, y, bound=1, paired=False, correction=False):
    """Modified tost for just upper bound"""
    from pingouin.parametric import ttest

    x = np.asarray(x)
    y = np.asarray(y)
    assert isinstance(bound, (int, float)), "bound must be int or float."

    df_b = ttest(x - bound,
                 y,
                 paired=paired,
                 correction=correction,
                 tail="less")
    pval = df_b.at["T-test", "p-val"]
    t = df_b.at["T-test", "T"]

    # Create output dataframe
    stats = {
        "bound": bound,
        "dof": df_b.at["T-test", "dof"],
        "t-stat": t,
        "pval": pval
    }
    return pd.DataFrame.from_records(stats, index=["TOST"])
Example #4
0
    def test_ttest(self):
        """Test function ttest.
        Compare with Matlab, R and JASP.
        """
        # Test different combination of argments
        h = np.random.normal(scale=0.9, size=95)
        ttest(x, 0.5)
        ttest(x, y, paired=False, correction='auto')
        ttest(x, y, paired=False, correction=True)
        ttest(x, y, paired=False, r=0.5)
        ttest(x, h, paired=True)

        a = [4, 7, 8, 6, 3, 2]
        b = [6, 8, 7, 10, 11, 9]

        # 1) One sample with y=0
        # R: t.test(a, mu=0)
        # Two-sided
        tt = ttest(a, y=0, tail='two-sided')
        assert round(tt.loc['T-test', 'T'], 5) == 5.17549
        assert tt.loc['T-test', 'dof'] == 5
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.00354
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [2.52, 7.48])
        # One-sided (greater)
        tt = ttest(a, y=0, tail='greater')
        assert round(tt.loc['T-test', 'T'], 5) == 5.17549
        assert tt.loc['T-test', 'dof'] == 5
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.00177
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [3.05, np.inf])
        # tail='one-sided' equals tail='greater'
        tt = ttest(a, y=0, tail='one-sided')
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.00177
        # One-sided (less)
        tt = ttest(a, y=0, tail='less')
        assert round(tt.loc['T-test', 'T'], 5) == 5.17549
        assert tt.loc['T-test', 'dof'] == 5
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.99823
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-np.inf, 6.95])

        # 2) One sample with y=4
        # R: t.test(a, mu=4)
        # Two-sided
        tt = ttest(a, y=4, tail='two-sided')
        assert round(tt.loc['T-test', 'T'], 5) == 1.0351
        assert tt.loc['T-test', 'dof'] == 5
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.34807
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [2.52, 7.48])
        # One-sided (greater)
        tt = ttest(a, y=4, tail='greater')
        assert round(tt.loc['T-test', 'T'], 5) == 1.0351
        assert tt.loc['T-test', 'dof'] == 5
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.17403
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [3.05, np.inf])
        # tail='one-sided' equals tail='greater'
        tt = ttest(a, y=4, tail='one-sided')
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.17403
        # One-sided (less)
        tt = ttest(a, y=4, tail='less')
        assert round(tt.loc['T-test', 'T'], 5) == 1.0351
        assert tt.loc['T-test', 'dof'] == 5
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.82597
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-np.inf, 6.95])

        # 3) Paired two-sample
        # R: t.test(a, b, paired=TRUE)
        # Two-sided
        tt = ttest(a, b, paired=True, tail='two-sided')
        assert round(tt.loc['T-test', 'T'], 5) == -2.44451
        assert tt.loc['T-test', 'dof'] == 5
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.05833
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-7.18, 0.18])
        # One-sided (greater)
        tt = ttest(a, b, paired=True, tail='greater')
        assert round(tt.loc['T-test', 'T'], 5) == -2.44451
        assert tt.loc['T-test', 'dof'] == 5
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.97084
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-6.39, np.inf])
        # One-sided (less)
        tt = ttest(a, b, paired=True, tail='less')
        assert round(tt.loc['T-test', 'T'], 5) == -2.44451
        assert tt.loc['T-test', 'dof'] == 5
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.02916
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-np.inf, -0.61])
        # tail='one-sided' equals tail='less'
        tt = ttest(a, b, paired=True, tail='one-sided')
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.02916

        # When the two arrays are identical
        tt = ttest(a, a, paired=True)
        assert str(tt.loc['T-test', 'T']) == str(np.nan)
        assert str(tt.loc['T-test', 'p-val']) == str(np.nan)
        assert tt.loc['T-test', 'cohen-d'] == 0.
        assert tt.loc['T-test', 'BF10'] == str(np.nan)

        # 4) Independent two-samples, equal variance (no correction)
        # R: t.test(a, b, paired=FALSE, var.equal=TRUE)
        # Two-sided
        tt = ttest(a, b, correction=False, tail='two-sided')
        assert round(tt.loc['T-test', 'T'], 5) == -2.84199
        assert tt.loc['T-test', 'dof'] == 10
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.01749
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-6.24, -0.76])
        # One-sided (greater)
        tt = ttest(a, b, correction=False, tail='greater')
        assert round(tt.loc['T-test', 'T'], 5) == -2.84199
        assert tt.loc['T-test', 'dof'] == 10
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.99126
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-5.73, np.inf])
        # One-sided (less)
        tt = ttest(a, b, correction=False, tail='less')
        assert round(tt.loc['T-test', 'T'], 5) == -2.84199
        assert tt.loc['T-test', 'dof'] == 10
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.00874
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-np.inf, -1.27])

        # 5) Independent two-samples, Welch correction
        # R: t.test(a, b, paired=FALSE, var.equal=FALSE)
        # Two-sided
        tt = ttest(a, b, correction=True, tail='two-sided')
        assert round(tt.loc['T-test', 'T'], 5) == -2.84199
        assert round(tt.loc['T-test', 'dof'], 5) == 9.49438
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.01837
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-6.26, -0.74])
        # One-sided (greater)
        tt = ttest(a, b, correction=True, tail='greater')
        assert round(tt.loc['T-test', 'T'], 5) == -2.84199
        assert round(tt.loc['T-test', 'dof'], 5) == 9.49438
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.99082
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-5.74, np.inf])
        # One-sided (less)
        tt = ttest(a, b, correction=True, tail='less')
        assert round(tt.loc['T-test', 'T'], 5) == -2.84199
        assert round(tt.loc['T-test', 'dof'], 5) == 9.49438
        assert round(tt.loc['T-test', 'p-val'], 5) == 0.00918
        array_equal(np.round(tt.loc['T-test', 'CI95%'], 2), [-np.inf, -1.26])
Example #5
0
 def test_ttest(self):
     """Test function ttest"""
     h = np.random.normal(scale=0.9, size=95)
     ttest(x, 0.5)
     stats = ttest(x, y, paired=True, tail='one-sided')
     # Compare with JASP
     assert np.allclose(stats.loc['T-test', 'T'], 0.616)
     assert np.allclose(stats.loc['T-test', 'p-val'].round(3), .270)
     ttest(x, y, paired=False, correction='auto')
     ttest(x, y, paired=False, correction=True)
     ttest(x, y, paired=False, r=0.5)
     ttest(x, h, paired=True)
Example #6
0
def pairwise_ttests(dv=None,
                    between=None,
                    within=None,
                    subject=None,
                    data=None,
                    alpha=.05,
                    tail='two-sided',
                    padjust='none',
                    effsize='hedges',
                    return_desc=False,
                    export_filename=None):
    '''Pairwise T-tests.

    Parameters
    ----------
    dv : string
        Name of column containing the dependant variable.
    between : string or list with 2 elements
        Name of column(s) containing the between factor(s).
    within : string or list with 2 elements
        Name of column(s) containing the within factor(s).
    subject : string
        Name of column containing the subject identifier. Compulsory for
        contrast including a within-subject factor.
    data : pandas DataFrame
        DataFrame
    alpha : float
        Significance level
    tail : string
        Indicates whether to return the 'two-sided' or 'one-sided' p-values
    padjust : string
        Method used for testing and adjustment of pvalues.
        Available methods are ::

        'none' : no correction
        'bonferroni' : one-step Bonferroni correction
        'holm' : step-down method using Bonferroni adjustments
        'fdr_bh' : Benjamini/Hochberg FDR correction
        'fdr_by' : Benjamini/Yekutieli FDR correction
    effsize : string or None
        Effect size type. Available methods are ::

        'none' : no effect size
        'cohen' : Unbiased Cohen d
        'hedges' : Hedges g
        'glass': Glass delta
        'eta-square' : Eta-square
        'odds-ratio' : Odds ratio
        'AUC' : Area Under the Curve
    return_desc : boolean
        If True, append group means and std to the output dataframe
    export_filename : string
        Filename (without extension) for the output file.
        If None, do not export the table.
        By default, the file will be created in the current python console
        directory. To change that, specify the filename with full path.

    Returns
    -------
    stats : DataFrame
        Stats summary ::

        'A' : Name of first measurement
        'B' : Name of second measurement
        'Paired' : indicates whether the two measurements are paired or not
        'Tail' : indicate whether the p-values are one-sided or two-sided
        'T' : T-values
        'p-unc' : Uncorrected p-values
        'p-corr' : Corrected p-values
        'p-adjust' : p-values correction method
        'BF10' : Bayes Factor
        'efsize' : effect sizes
        'eftype' : type of effect size

    Notes
    -----
    If between or within is a list (e.g. ['col1', 'col2']), the function
    returns 1) the pairwise T-tests between each values of the first column,
    2) the pairwise T-tests between each values of the second column and
    3) the interaction between col1 and col2. The interaction is dependent
    of the order of the list, so ['col1', 'col2'] will not yield the same
    results as ['col2', 'col1'].

    In other words, if between is a list with two elements, the output model is
    between1 + between2 + between1 * between2.

    Similarly, if within is a list with two elements, the output model is
    within1 + within2 + within1 * within2.

    If both between and within are specified, the function return within +
    between + within * between.

    Examples
    --------
    1. One between-factor

        >>> from pingouin import pairwise_ttests
        >>> from pingouin.datasets import read_dataset
        >>> df = read_dataset('mixed_anova.csv')
        >>> post_hocs = pairwise_ttests(dv='Scores', between='Group', data=df)
        >>> print(post_hocs)

    2. One within-factor

        >>> post_hocs = pairwise_ttests(dv='Scores', within='Time',
        >>>                             subject='Subject', data=df)
        >>> print(post_hocs)

    3. Within + Between + Within * Between with corrected p-values

        >>> post_hocs = pairwise_ttests(dv='Scores', within='Time',
        >>>                             subject='Subject', between='Group',
        >>>                             padjust='bonf', data=df)
        >>> print(post_hocs)

    3. Between1 + Between2 + Between1 * Between2

        >>> pairwise_ttests(dv='Scores', between=['Group', 'Time'], data=df)
    '''
    from pingouin.parametric import ttest

    # Safety checks
    _check_dataframe(dv=dv,
                     between=between,
                     within=within,
                     subject=subject,
                     effects='all',
                     data=data)

    if tail not in ['one-sided', 'two-sided']:
        raise ValueError('Tail not recognized')

    if not isinstance(alpha, float):
        raise ValueError('Alpha must be float')

    # Check if we have multiple between or within factors
    multiple_between = False
    multiple_within = False
    contrast = None

    if isinstance(between, list):
        if len(between) > 1:
            multiple_between = True
            contrast = 'multiple_between'
            assert all([b in data.keys() for b in between])
        else:
            between = between[0]

    if isinstance(within, list):
        if len(within) > 1:
            multiple_within = True
            contrast = 'multiple_within'
            assert all([w in data.keys() for w in within])
        else:
            within = within[0]

    if all([multiple_within, multiple_between]):
        raise ValueError("Multiple between and within factors are",
                         "currently not supported. Please select only one.")

    # Check the other cases
    if isinstance(between, str) and within is None:
        contrast = 'simple_between'
        assert between in data.keys()
    if isinstance(within, str) and between is None:
        contrast = 'simple_within'
        assert within in data.keys()
    if isinstance(between, str) and isinstance(within, str):
        contrast = 'within_between'
        assert all([between in data.keys(), within in data.keys()])

    # Initialize empty variables
    stats = pd.DataFrame([])
    ddic = {}

    if contrast in ['simple_within', 'simple_between']:
        # OPTION A: SIMPLE MAIN EFFECTS, WITHIN OR BETWEEN
        paired = True if contrast == 'simple_within' else False
        col = within if contrast == 'simple_within' else between
        # Remove NAN in repeated measurements
        if contrast == 'simple_within' and data[dv].isnull().values.any():
            data = _remove_rm_na(dv=dv,
                                 within=within,
                                 subject=subject,
                                 data=data)
        # Extract effects
        labels = data[col].unique().tolist()
        for l in labels:
            ddic[l] = data.loc[data[col] == l, dv].values
        # Number and labels of possible comparisons
        if len(labels) >= 2:
            combs = list(combinations(labels, 2))
        else:
            raise ValueError('Columns must have at least two unique values.')
        # Initialize vectors
        for comb in combs:
            col1, col2 = comb
            x = ddic.get(col1)
            y = ddic.get(col2)
            df_ttest = ttest(x, y, paired=paired, tail=tail)
            ef = compute_effsize(x=x, y=y, eftype=effsize, paired=paired)
            stats = _append_stats_dataframe(stats, x, y, col1, col2, alpha,
                                            paired, df_ttest, ef, effsize)
            stats['Contrast'] = col

        # Multiple comparisons
        padjust = None if stats['p-unc'].size <= 1 else padjust
        if padjust is not None:
            if padjust.lower() != 'none':
                _, stats['p-corr'] = multicomp(stats['p-unc'].values,
                                               alpha=alpha,
                                               method=padjust)
                stats['p-adjust'] = padjust
        else:
            stats['p-corr'] = None
            stats['p-adjust'] = None
    else:
        # B1: BETWEEN1 + BETWEEN2 + BETWEEN1 * BETWEEN2
        # B2: WITHIN1 + WITHIN2 + WITHIN1 * WITHIN2
        # B3: WITHIN + BETWEEN + WITHIN * BETWEEN
        if contrast == 'multiple_between':
            # B1
            factors = between
            fbt = factors
            fwt = [None, None]
            # eft = ['between', 'between']
            paired = False
        elif contrast == 'multiple_within':
            # B2
            factors = within
            fbt = [None, None]
            fwt = factors
            # eft = ['within', 'within']
            paired = True
        else:
            # B3
            factors = [within, between]
            fbt = [None, between]
            fwt = [within, None]
            # eft = ['within', 'between']
            paired = False

        for i, f in enumerate(factors):
            stats = stats.append(pairwise_ttests(dv=dv,
                                                 between=fbt[i],
                                                 within=fwt[i],
                                                 subject=subject,
                                                 data=data,
                                                 alpha=alpha,
                                                 tail=tail,
                                                 padjust=padjust,
                                                 effsize=effsize,
                                                 return_desc=return_desc),
                                 ignore_index=True,
                                 sort=False)

        # Then compute the interaction between the factors
        labels_fac1 = data[factors[0]].unique().tolist()
        labels_fac2 = data[factors[1]].unique().tolist()
        comb_fac1 = list(combinations(labels_fac1, 2))
        comb_fac2 = list(combinations(labels_fac2, 2))
        lc_fac1 = len(comb_fac1)
        lc_fac2 = len(comb_fac2)

        for lw in labels_fac1:
            for l in labels_fac2:
                tmp = data.loc[data[factors[0]] == lw]
                ddic[lw, l] = tmp.loc[tmp[factors[1]] == l, dv].values

        # Pairwise comparisons
        combs = list(product(labels_fac1, comb_fac2))
        for comb in combs:
            fac1, (col1, col2) = comb
            x = ddic.get((fac1, col1))
            y = ddic.get((fac1, col2))
            df_ttest = ttest(x, y, paired=paired, tail=tail)
            ef = compute_effsize(x=x, y=y, eftype=effsize, paired=paired)
            stats = _append_stats_dataframe(stats, x, y, col1, col2, alpha,
                                            paired, df_ttest, ef, effsize,
                                            fac1)

        # Update the Contrast columns
        txt_inter = factors[0] + ' * ' + factors[1]
        idxitr = np.arange(lc_fac1 + lc_fac2, stats.shape[0]).tolist()
        stats.loc[idxitr, 'Contrast'] = txt_inter

        # Multi-comparison columns
        if padjust is not None and padjust.lower() != 'none':
            _, pcor = multicomp(stats.loc[idxitr, 'p-unc'].values,
                                alpha=alpha,
                                method=padjust)
            stats.loc[idxitr, 'p-corr'] = pcor
            stats.loc[idxitr, 'p-adjust'] = padjust

    # ---------------------------------------------------------------------
    stats['Paired'] = stats['Paired'].astype(bool)

    # Reorganize column order
    col_order = [
        'Contrast', 'Time', 'A', 'B', 'mean(A)', 'std(A)', 'mean(B)', 'std(B)',
        'Paired', 'T', 'tail', 'p-unc', 'p-corr', 'p-adjust', 'BF10', 'efsize',
        'eftype'
    ]

    if return_desc is False:
        stats.drop(columns=['mean(A)', 'mean(B)', 'std(A)', 'std(B)'],
                   inplace=True)

    stats = stats.reindex(columns=col_order)
    stats.dropna(how='all', axis=1, inplace=True)

    # Rename Time columns
    if contrast in ['multiple_within', 'multiple_between', 'within_between']:
        stats['Time'].fillna('-', inplace=True)
        stats.rename(columns={'Time': factors[0]}, inplace=True)

    if export_filename is not None:
        _export_table(stats, export_filename)
    return stats
Example #7
0
def tost(x, y, bound=1, paired=False, correction=False):
    """Two One-Sided Test (TOST) for equivalence.

    Parameters
    ----------
    x, y : array_like
        First and second set of observations. ``x`` and ``y`` should have the
        same units. If ``y`` is a single value (e.g. 0), a one-sample test is
        performed.
    bound : float
        Magnitude of region of similarity (a.k.a epsilon). Note that this
        should be expressed in the same unit as ``x`` and ``y``.
    paired : boolean
        Specify whether the two observations are related (i.e. repeated
        measures) or independent.
    correction : auto or boolean
        Specify whether or not to correct for unequal variances using Welch
        separate variances T-test. This only applies if ``paired`` is False.

    Returns
    -------
    stats : :py:class:`pandas.DataFrame`

        * ``'bound'``: bound (= epsilon, or equivalence margin)
        * ``'dof'``: degrees of freedom
        * ``'pval'``: TOST p-value

    See also
    --------
    ttest

    References
    ----------
    .. [1] Schuirmann, D.L. 1981. On hypothesis testing to determine if the
           mean of a normal distribution is contained in a known interval.
           Biometrics 37 617.

    .. [2] https://cran.r-project.org/web/packages/equivalence/equivalence.pdf

    Examples
    --------
    1. Independent two-sample TOST with a region of similarity of 1 (default)

    >>> import pingouin as pg
    >>> a = [4, 7, 8, 6, 3, 2]
    >>> b = [6, 8, 7, 10, 11, 9]
    >>> pg.tost(a, b)
          bound  dof      pval
    TOST      1   10  0.965097

    2. Paired TOST with a different region of similarity

    >>> pg.tost(a, b, bound=0.5, paired=True)
          bound  dof      pval
    TOST    0.5    5  0.954854

    3. One sample TOST

    >>> pg.tost(a, y=0, bound=4)
          bound  dof      pval
    TOST      4    5  0.825967
    """
    x = np.asarray(x)
    y = np.asarray(y)
    assert isinstance(bound, (int, float)), 'bound must be int or float.'

    # T-tests
    df_a = ttest(x + bound,
                 y,
                 paired=paired,
                 correction=correction,
                 alternative='greater')
    df_b = ttest(x - bound,
                 y,
                 paired=paired,
                 correction=correction,
                 alternative='less')
    pval = max(df_a.at['T-test', 'p-val'], df_b.at['T-test', 'p-val'])

    # Create output dataframe
    stats = pd.DataFrame(
        {
            'bound': bound,
            'dof': df_a.at['T-test', 'dof'],
            'pval': pval
        },
        index=['TOST'])
    return _postprocess_dataframe(stats)
Example #8
0
 def test_ttest(self):
     """Test function ttest"""
     h = np.random.normal(scale=0.9, size=95)
     ttest(x, 0.5)
     stats = ttest(x, y, paired=True, tail='one-sided')
     # Compare with JASP
     assert np.allclose(stats.loc['T-test', 'T'], 0.616)
     assert np.allclose(stats.loc['T-test', 'p-val'].round(3), .270)
     ttest(x, y, paired=False, correction='auto')
     ttest(x, y, paired=False, correction=True)
     ttest(x, y, paired=False, r=0.5)
     ttest(x, h, paired=True)
     # Compare with R t.test
     a = [4, 7, 8, 6, 3, 2]
     b = [6, 8, 7, 10, 11, 9]
     tt = ttest(a, b, paired=False, correction=False, tail='two-sided')
     assert tt.loc['T-test', 'T'] == -2.842
     assert tt.loc['T-test', 'dof'] == 10
     assert round(tt.loc['T-test', 'p-val'], 5) == 0.01749
     np.testing.assert_allclose(tt.loc['T-test', 'CI95%'], [-6.24, -0.76])
     # - Two sample unequal variances
     tt = ttest(a, b, paired=False, correction=True, tail='two-sided')
     assert tt.loc['T-test', 'dof'] == 9.49
     assert round(tt.loc['T-test', 'p-val'], 5) == 0.01837
     np.testing.assert_allclose(tt.loc['T-test', 'CI95%'], [-6.26, -0.74])
     # - Paired
     tt = ttest(a, b, paired=True, correction=False, tail='two-sided')
     assert tt.loc['T-test', 'T'] == -2.445
     assert tt.loc['T-test', 'dof'] == 5
     assert round(tt.loc['T-test', 'p-val'], 5) == 0.05833
     np.testing.assert_allclose(tt.loc['T-test', 'CI95%'], [-7.18, 0.18])
     # - One sample one-sided
     tt = ttest(a, y=0, paired=False, correction=False, tail='one-sided')
     assert tt.loc['T-test', 'T'] == 5.175
     assert tt.loc['T-test', 'dof'] == 5
     assert round(tt.loc['T-test', 'p-val'], 3) == 0.002
     np.testing.assert_allclose(tt.loc['T-test', 'CI95%'], [3.05, 6.95])
Example #9
0
 def test_ttest(self):
     """Test function ttest"""
     h = np.random.normal(scale=0.9, size=95)
     ttest(x, 0.5)
     stats = ttest(x, y, paired=True, tail='one-sided')
     # Compare with JASP
     assert np.allclose(stats.loc['T-test', 'T'], 0.616)
     assert np.allclose(stats.loc['T-test', 'p-val'].round(3), .270)
     ttest(x, y, paired=False, correction='auto')
     ttest(x, y, paired=False, correction=True)
     ttest(x, y, paired=False, r=0.5)
     ttest(x, h, paired=True)
     # Compare with R t.test
     a = [4, 7, 8, 6, 3, 2]
     b = [6, 8, 7, 10, 11, 9]
     tt = ttest(a, b, paired=False, correction=False, tail='two-sided')
     assert tt.loc['T-test', 'T'] == -2.842
     assert tt.loc['T-test', 'dof'] == 10
     assert round(tt.loc['T-test', 'p-val'], 5) == 0.01749
     np.testing.assert_allclose(tt.loc['T-test', 'CI95%'], [-6.24, -0.76])
     # - Two sample unequal variances
     tt = ttest(a, b, paired=False, correction=True, tail='two-sided')
     assert tt.loc['T-test', 'dof'] == 9.49
     assert round(tt.loc['T-test', 'p-val'], 5) == 0.01837
     np.testing.assert_allclose(tt.loc['T-test', 'CI95%'], [-6.26, -0.74])
     # - Paired
     tt = ttest(a, b, paired=True, correction=False, tail='two-sided')
     assert tt.loc['T-test', 'T'] == -2.445
     assert tt.loc['T-test', 'dof'] == 5
     assert round(tt.loc['T-test', 'p-val'], 5) == 0.05833
     np.testing.assert_allclose(tt.loc['T-test', 'CI95%'], [-7.18, 0.18])
     # When the two arrays are identical
     tt = ttest(a, a, paired=True)
     assert str(tt.loc['T-test', 'T']) == str(np.nan)
     assert str(tt.loc['T-test', 'p-val']) == str(np.nan)
     assert tt.loc['T-test', 'cohen-d'] == 0.
     assert tt.loc['T-test', 'BF10'] == str(np.nan)
     # - One sample one-sided
     tt = ttest(a, y=0, paired=False, correction=False, tail='one-sided')
     assert tt.loc['T-test', 'T'] == 5.175
     assert tt.loc['T-test', 'dof'] == 5
     assert round(tt.loc['T-test', 'p-val'], 3) == 0.002
     np.testing.assert_allclose(tt.loc['T-test', 'CI95%'], [3.05, np.inf])
     # - Two-sample equal variances, tail = 'greater'
     tt = ttest(a, b, paired=False, tail='greater')
     assert tt.loc['T-test', 'tail'] == 'greater'
     assert round(tt.loc['T-test', 'p-val'], 4) == 0.9913
     assert float(tt.loc['T-test', 'BF10']) < 1
     np.testing.assert_allclose(tt.loc['T-test', 'CI95%'], [-5.73, np.inf])
     # tail = 'less'
     tt = ttest(a, b, paired=False, tail='less')
     assert tt.loc['T-test', 'tail'] == 'less'
     assert round(tt.loc['T-test', 'p-val'], 5) == 0.00874
     assert float(tt.loc['T-test', 'BF10']) > 1
     np.testing.assert_allclose(tt.loc['T-test', 'CI95%'], [-np.inf, -1.27])
Example #10
0
def tost(x, y, paired=False, parametric=True, bound=0.3, correction=False):
    """T-test.

    Parameters
    ----------
    x : array_like
        First set of observations.
    y : array_like or float
        Second set of observations. If y is a single value, a one-sample T-test
        is computed.
    paired : boolean
        Specify whether the two observations are related (i.e. repeated
        measures) or independent.
    parametric : boolean
        If True (default), use the parametric :py:func:`ttest` function.
        If False, use :py:func:`pingouin.wilcoxon` or :py:func:`pingouin.mwu`
        for paired or unpaired samples, respectively.
    bound : float
        Magnitude of region of similarity
    correction : auto or boolean
        Specify whether or not to correct for unequal variances using Welch separate variances T-test

    Returns
    -------
    stats : pandas DataFrame
        TOST summary ::

        'upper' : upper interval p-value
        'lower' : lower interval p-value
        'p-val' : TOST p-value

    """
    if parametric:
        df_ttesta = ttest(list(np.asarray(y) + bound),
                          x,
                          paired=paired,
                          tail='one-sided',
                          correction=correction)
        df_ttestb = ttest(list(np.asarray(x) + bound),
                          y,
                          paired=paired,
                          tail='one-sided',
                          correction=correction)
        if df_ttestb.loc['T-test', 'T'] < 0:
            df_ttestb.loc['T-test',
                          'p-val'] = 1 - df_ttestb.loc['T-test', 'p-val']
        if df_ttesta.loc['T-test', 'T'] < 0:
            df_ttesta.loc['T-test',
                          'p-val'] = 1 - df_ttesta.loc['T-test', 'p-val']
        if df_ttestb.loc['T-test', 'p-val'] >= df_ttesta.loc['T-test',
                                                             'p-val']:
            pval = df_ttestb.loc['T-test', 'p-val']
            lpval = df_ttesta.loc['T-test', 'p-val']
        else:
            pval = df_ttesta.loc['T-test', 'p-val']
            lpval = df_ttestb.loc['T-test', 'p-val']
    else:
        if paired:
            df_ttesta = wilcoxon(list(np.asarray(y) + bound),
                                 x,
                                 tail='greater')
            df_ttestb = wilcoxon(list(np.asarray(x) + bound),
                                 y,
                                 tail='greater')
            if df_ttestb.loc['Wilcoxon', 'p-val'] >= df_ttesta.loc['Wilcoxon',
                                                                   'p-val']:
                pval = df_ttestb.loc['Wilcoxon', 'p-val']
                lpval = df_ttesta.loc['Wilcoxon', 'p-val']
            else:
                pval = df_ttesta.loc['Wilcoxon', 'p-val']
                lpval = df_ttestb.loc['Wilcoxon', 'p-val']
        else:
            df_ttesta = mwu(list(np.asarray(y) + bound), x, tail='greater')
            df_ttestb = mwu(list(np.asarray(x) + bound), y, tail='greater')
            if df_ttestb.loc['MWU', 'p-val'] >= df_ttesta.loc['MWU', 'p-val']:
                pval = df_ttestb.loc['MWU', 'p-val']
                lpval = df_ttesta.loc['MWU', 'p-val']
            else:
                pval = df_ttesta.loc['MWU', 'p-val']
                lpval = df_ttestb.loc['MWU', 'p-val']
    stats = {'p-val': pval, 'upper': pval, 'lower': lpval}

    # Convert to dataframe
    stats = pd.DataFrame.from_records(stats, index=['TOST'])

    col_order = ['upper', 'lower', 'p-val']
    stats = stats.reindex(columns=col_order)
    stats.dropna(how='all', axis=1, inplace=True)
    return stats