예제 #1
0
def test_effectsize_power():
    # example and results from PASS documentation
    n_groups = 3
    means = [527.86, 660.43, 649.14]
    vars_ = 107.4304**2
    nobs = 12
    es = effectsize_oneway(means, vars_, nobs, use_var="equal", ddof_between=0)
    es = np.sqrt(es)

    alpha = 0.05
    power = 0.8
    nobs_t = nobs * n_groups
    kwds = {
        'effect_size': es,
        'nobs': nobs_t,
        'alpha': alpha,
        'power': power,
        'k_groups': n_groups
    }

    from statsmodels.stats.power import FTestAnovaPower

    res_pow = 0.8251
    res_es = 0.559
    kwds_ = kwds.copy()
    del kwds_['power']
    p = FTestAnovaPower().power(**kwds_)
    assert_allclose(p, res_pow, atol=0.0001)
    assert_allclose(es, res_es, atol=0.0006)

    # example unequal sample sizes
    nobs = np.array([15, 9, 9])
    kwds['nobs'] = nobs
    es = effectsize_oneway(means, vars_, nobs, use_var="equal", ddof_between=0)
    es = np.sqrt(es)
    kwds['effect_size'] = es
    p = FTestAnovaPower().power(**kwds_)

    res_pow = 0.8297
    res_es = 0.590
    assert_allclose(p, res_pow, atol=0.005)  # lower than print precision
    assert_allclose(es, res_es, atol=0.0006)
예제 #2
0
def test_simulate_equivalence():
    # regression test, needs large k_mc to be reliable

    k_groups = 4
    k_repl = 10
    nobs = np.array([10, 12, 13, 15]) * k_repl
    means = np.array([-1, 0, 0, 1]) * 0.12
    vars_ = np.array([1, 2, 3, 4])
    nobs_t = nobs.sum()

    eps = 0.0191 * 10
    opt_var = ["unequal", "equal", "bf"]
    k_mc = 100
    np.random.seed(987126)
    res_mc = smo.simulate_power_equivalence_oneway(means,
                                                   nobs,
                                                   eps,
                                                   vars_=vars_,
                                                   k_mc=k_mc,
                                                   trim_frac=0.1,
                                                   options_var=opt_var,
                                                   margin_type="wellek")

    frac_reject = (res_mc.pvalue <= 0.05).sum(0) / k_mc
    assert_allclose(frac_reject, [0.17, 0.18, 0.14], atol=0.001)
    # result with k_mc = 10000 is [0.1466, 0.1871, 0.1606]
    # similar to asy below, but not very close for all

    es_alt_li = []
    for uv in opt_var:
        es = effectsize_oneway(means, vars_, nobs, use_var=uv)
        es_alt_li.append(es)

    # compute asy power as comparison
    margin = wellek_to_f2(eps, k_groups)
    pow_ = [
        power_equivalence_oneway(es_,
                                 margin,
                                 nobs_t,
                                 n_groups=k_groups,
                                 df=None,
                                 alpha=0.05,
                                 margin_type="f2") for es_ in es_alt_li
    ]
    # regression test numbers
    assert_allclose(pow_, [0.147749, 0.173358, 0.177412], atol=0.007)
def coehen_f(data, groups, metric):
    """"Cohens f, for calculating the effect size for anova power analysis.

    Args:
        data (DataFrame): DataFrame with columns 'groups' and 'metric'.
        groups (str): Name of the groups column in the data, i.e. the groups for the anova test.
        metric (str): Name of the metric column in the data, i.e. the dependant variable.

    Returns:
        cohens_f (float): Cohens f effect size.
    """
    # Get the means and variance of the data
    means = data.groupby(
        [groups], sort=False).apply(lambda x: mean(x[metric])).to_list()
    variances = data.groupby(
        [groups], sort=False).apply(lambda x: variance(x[metric])).to_list()

    # Calculate effect size
    cohens_f = effectsize_oneway(means, variances, len(data), use_var='equal')
    return cohens_f
예제 #4
0
    def test_ols_noncentrality(self):
        k = self.k_groups

        res_ols = OLS(self.y, self.ex).fit()
        nobs_t = res_ols.model.nobs

        # constraint
        c_equal = -np.eye(k)[1:]
        c_equal[:, 0] = 1
        v = np.zeros(c_equal.shape[0])

        # noncentrality at estimated parameters
        wt = res_ols.wald_test(c_equal, scalar=True)
        df_num, df_denom = wt.df_num, wt.df_denom

        cov_p = res_ols.cov_params()

        nc_wt = wald_test_noncent_generic(res_ols.params,
                                          c_equal,
                                          v,
                                          cov_p,
                                          diff=None,
                                          joint=True)
        assert_allclose(nc_wt, wt.statistic * wt.df_num, rtol=1e-13)

        nc_wt2 = wald_test_noncent(res_ols.params,
                                   c_equal,
                                   v,
                                   res_ols,
                                   diff=None,
                                   joint=True)
        assert_allclose(nc_wt2, nc_wt, rtol=1e-13)

        es_ols = nc_wt / nobs_t
        es_oneway = smo.effectsize_oneway(res_ols.params,
                                          res_ols.scale,
                                          self.nobs,
                                          use_var="equal")
        assert_allclose(es_ols, es_oneway, rtol=1e-13)

        alpha = 0.05
        pow_ols = smpwr.ftest_power(np.sqrt(es_ols),
                                    df_denom,
                                    df_num,
                                    alpha,
                                    ncc=1)
        pow_oneway = smpwr.ftest_anova_power(np.sqrt(es_oneway),
                                             nobs_t,
                                             alpha,
                                             k_groups=k,
                                             df=None)
        assert_allclose(pow_ols, pow_oneway, rtol=1e-13)

        # noncentrality at other params
        params_alt = res_ols.params * 0.75
        # compute constraint value so we can get noncentrality from wald_test
        v_off = _offset_constraint(c_equal, res_ols.params, params_alt)
        wt_off = res_ols.wald_test((c_equal, v + v_off), scalar=True)
        nc_wt_off = wald_test_noncent_generic(params_alt,
                                              c_equal,
                                              v,
                                              cov_p,
                                              diff=None,
                                              joint=True)
        assert_allclose(nc_wt_off,
                        wt_off.statistic * wt_off.df_num,
                        rtol=1e-13)

        # check vectorized version, joint=False
        nc_wt_vec = wald_test_noncent_generic(params_alt,
                                              c_equal,
                                              v,
                                              cov_p,
                                              diff=None,
                                              joint=False)
        for i in range(c_equal.shape[0]):
            nc_wt_i = wald_test_noncent_generic(
                params_alt,
                c_equal[i:i + 1],  # noqa
                v[i:i + 1],
                cov_p,
                diff=None,  # noqa
                joint=False)
            assert_allclose(nc_wt_vec[i], nc_wt_i, rtol=1e-13)
예제 #5
0
    def test_equivalence_welch(self):
        # reference numbers from Jan and Shieh 2019, p. 6
        means = self.means
        nobs = self.nobs
        stds = self.stds
        n_groups = self.n_groups
        vars_ = stds**2

        eps = 0.5
        res0 = anova_generic(means,
                             vars_,
                             nobs,
                             use_var="unequal",
                             welch_correction=False)
        f_stat = res0.statistic
        res = equivalence_oneway_generic(f_stat,
                                         n_groups,
                                         nobs.sum(),
                                         eps,
                                         res0.df,
                                         alpha=0.05,
                                         margin_type="wellek")
        assert_allclose(res.pvalue, 0.0110, atol=0.001)
        assert_allclose(res.df, [3.0, 22.6536], atol=0.0006)

        # agreement for Welch f-stat looks too low b/c welch_correction=False
        assert_allclose(f_stat, 0.1102, atol=0.007)

        res = equivalence_oneway(self.data,
                                 eps,
                                 use_var="unequal",
                                 margin_type="wellek")
        assert_allclose(res.pvalue, 0.0110, atol=1e-4)
        assert_allclose(res.df, [3.0, 22.6536], atol=0.0006)
        assert_allclose(res.f_stat, 0.1102, atol=1e-4)  # 0.007)

        # check post-hoc power, JS p. 6
        pow_ = _power_equivalence_oneway_emp(f_stat, n_groups, nobs, eps,
                                             res0.df)
        assert_allclose(pow_, 0.1552, atol=0.007)

        pow_ = power_equivalence_oneway(eps,
                                        eps,
                                        nobs.sum(),
                                        n_groups=n_groups,
                                        df=None,
                                        alpha=0.05,
                                        margin_type="wellek")
        assert_allclose(pow_, 0.05, atol=1e-13)

        nobs_t = nobs.sum()
        es = effectsize_oneway(means, vars_, nobs, use_var="unequal")
        es = np.sqrt(es)
        es_w0 = f2_to_wellek(es**2, n_groups)
        es_w = np.sqrt(fstat_to_wellek(f_stat, n_groups, nobs_t / n_groups))

        pow_ = power_equivalence_oneway(es_w,
                                        eps,
                                        nobs_t,
                                        n_groups=n_groups,
                                        df=None,
                                        alpha=0.05,
                                        margin_type="wellek")
        assert_allclose(pow_, 0.1552, atol=0.007)
        assert_allclose(es_w0, es_w, atol=0.007)

        margin = wellek_to_f2(eps, n_groups)
        pow_ = power_equivalence_oneway(es**2,
                                        margin,
                                        nobs_t,
                                        n_groups=n_groups,
                                        df=None,
                                        alpha=0.05,
                                        margin_type="f2")
        assert_allclose(pow_, 0.1552, atol=0.007)