Ejemplo n.º 1
0
def test_score_confint_koopman_nam():

    # example Koopman, based on Nam 1995

    x0, n0 = 16, 80
    x1, n1 = 36, 40
    # x = x0 + x1
    # n = n0 + n1
    # p0 = x0 / n0
    # p1 = x1 / n1

    results_nam = Holder()
    results_nam.p0_roots = [0.1278, 0.2939, 0.4876]
    results_nam.conf_int = [2.940, 7.152]

    res = smprop._confint_riskratio_koopman(x1, n1, x0, n0, alpha=0.05)

    assert_allclose(res._p_roots, results_nam.p0_roots, atol=4)
    assert_allclose(res.confint, results_nam.conf_int, atol=3)

    table = [67, 9, 7, 16]  # [67, 7, 9, 16]
    resp = smprop._confint_riskratio_paired_nam(table, alpha=0.05)
    # TODO: currently regression test, need verified results
    ci_old = [0.917832, 1.154177]
    assert_allclose(resp.confint, ci_old, atol=3)
Ejemplo n.º 2
0
    def setup_class(cls):

        nobs = 2000
        exog = np.column_stack((np.ones(nobs), np.linspace(0, 3, nobs)))
        y_fake = np.arange(nobs) // (nobs / 3)  # need some zeros and non-zeros

        # get predicted probabilities for model
        mod = HurdleCountModel(y_fake, exog, dist="negbin", zerodist="negbin")
        p_dgp = np.array([-0.4, 2, 0.5, 0.2, 0.5, 0.5])
        probs = mod.predict(p_dgp, which="prob", y_values=np.arange(50))
        cdf = probs.cumsum(1)
        n = cdf.shape[0]
        cdf = np.column_stack((cdf, np.ones(n)))

        # simulate data,
        # cooked example that doesn't have identification problems
        rng = np.random.default_rng(987456348)
        u = rng.random((n, 1))
        endog = np.argmin(cdf < u, axis=1)

        mod_hnb = HurdleCountModel(endog, exog,
                                   dist="negbin", zerodist="negbin")
        cls.res1 = mod_hnb.fit(maxiter=300)

        df_null = 4
        cls.res2 = Holder(
            nobs=nobs,
            k_params=6,
            df_model=2,
            df_null=df_null,
            df_resid=nobs-6,
            k_extra=df_null - 1,
            exog_names=['zm_const', 'zm_x1', 'zm_alpha', 'const', 'x1',
                        'alpha'],
            )
Ejemplo n.º 3
0
def convert_effectsize_fsqu(f2=None, eta2=None):
    """convert squared effect sizes in f family

    f2 is signal to noise ratio, var_explained / var_residual
    eta2 is proportion of explained variance, var_explained / var_total
    omega2 is ...

    uses the relationship:
    f2 = eta2 / (1 - eta2)

    Parameters
    ----------
    f2 : None or float
       Squared Cohen's F effect size. If f2 is not None, then eta2 will be
       computed.
    eta2 : None or float
       Squared eta effect size. If f2 is None and eta2 is not None, then f2 is
       computed.

    Returns
    -------
    res : Holder instance
        An instance of the Holder class with f2 and eta2 as attributes.

    """
    if f2 is not None:
        eta2 = 1 / (1 + 1 / f2)

    elif eta2 is not None:
        f2 = eta2 / (1 - eta2)

    res = Holder(f2=f2, eta2=eta2)
    return res
Ejemplo n.º 4
0
    def get_results(cls):
        cls.res_m = [549.3846153846154, 557.5, 722.3571428571429]
        # results from R WRS2
        # > t1w = t1way(y ~ g, df3, tr=1/13)
        cls.res_oneway = Holder(
            test=8.81531710400927,
            df1=2,
            df2=19.8903710685394,
            p_value=0.00181464966984701,
            effsize=0.647137153056774,
        )

        # > yt = yuen(y ~ g, df3[1:29, ], tr=1/13)  # WRS2
        cls.res_2s = Holder(
            test=0.161970203096559,
            conf_int=np.array([-116.437383793431, 99.9568643129114]),
            p_value=0.873436269777141,
            df=15.3931262881751,
            diff=-8.24025974025983,
            effsize=0.0573842557922749,
        )

        # from library onewaytests
        # > bft = bf.test(y ~ g, df3)
        cls.res_bfm = Holder(statistic=7.10900606421182,
                             parameter=np.array([2, 31.4207256105052]),
                             p_value=0.00283841965791224,
                             alpha=0.05,
                             method='Brown-Forsythe Test')

        # > oww = oneway.test(y ~ g, df3, var.equal = FALSE)
        cls.res_wa = Holder(statistic=8.02355212103924,
                            parameter=np.array([2, 24.272320628139]),
                            p_value=0.00211423625518082,
                            method=('One-way analysis of means '
                                    '(not assuming equal variances)'))

        # > ow = oneway.test(y ~ g, df3, var.equal = TRUE)
        cls.res_fa = Holder(statistic=7.47403193349076,
                            parameter=np.array([2, 40]),
                            p_value=0.00174643304119871,
                            method='One-way analysis of means')
Ejemplo n.º 5
0
def test_fleiss_kappa_irr():
    fleiss = Holder()
    #> r = kappam.fleiss(diagnoses)
    #> cat_items(r, pref="fleiss.")
    fleiss.method = "Fleiss' Kappa for m Raters"
    fleiss.irr_name = 'Kappa'
    fleiss.value = 0.4302445
    fleiss.stat_name = 'z'
    fleiss.statistic = 17.65183
    fleiss.p_value = 0
    data_ = aggregate_raters(diagnoses)[0]
    res1_kappa = fleiss_kappa(data_)
    assert_almost_equal(res1_kappa, fleiss.value, decimal=7)
Ejemplo n.º 6
0
def test_fleiss_kappa_irr():
    fleiss = Holder()
    #> r = kappam.fleiss(diagnoses)
    #> cat_items(r, pref="fleiss.")
    fleiss.method = "Fleiss' Kappa for m Raters"
    fleiss.irr_name = 'Kappa'
    fleiss.value = 0.4302445
    fleiss.stat_name = 'z'
    fleiss.statistic = 17.65183
    fleiss.p_value = 0
    data_ = aggregate_raters(diagnoses)[0]
    res1_kappa = fleiss_kappa(data_)
    assert_almost_equal(res1_kappa, fleiss.value, decimal=7)
Ejemplo n.º 7
0
def _fstat2effectsize(f_stat, df):
    """Compute anova effect size from F-statistic

    This might be combined with convert_effectsize_fsqu

    Parameters
    ----------
    f_stat : array_like
        Test statistic of an F-test
    df : tuple
        degrees of freedom ``df = (df1, df2)`` where
         - df1 : numerator degrees of freedom, number of constraints
         - df2 : denominator degrees of freedom, df_resid

    Returns
    -------
    res : Holder instance
        This instance contains effect size measures f2, eta2, omega2 and eps2
        as attributes.

    Notes
    -----
    This uses the following definitions:

    - f2 = f_stat * df1 / df2
    - eta2 = f2 / (f2 + 1)
    - omega2 = (f2 - df1 / df2) / (f2 + 2)
    - eps2 = (f2 - df1 / df2) / (f2 + 1)

    This differs from effect size measures in other function which define
    ``f2 = f_stat * df1 / nobs``
    or an equivalent expression for power computation. The noncentrality
    index for the hypothesis test is in those cases given by
    ``nc = f_stat * df1``.

    Currently omega2 and eps2 are computed in two different ways. Those
    values agree for regular cases but can show different behavior in corner
    cases (e.g. zero division).

    """
    df1, df2 = df
    f2 = f_stat * df1 / df2
    eta2 = f2 / (f2 + 1)
    omega2_ = (f_stat - 1) / (f_stat + (df2 + 1) / df1)
    omega2 = (f2 - df1 / df2) / (f2 + 1 + 1 / df2)  # rewrite
    eps2_ = (f_stat - 1) / (f_stat + df2 / df1)
    eps2 = (f2 - df1 / df2) / (f2 + 1)  # rewrite
    return Holder(f2=f2, eta2=eta2, omega2=omega2, eps2=eps2, eps2_=eps2_,
                  omega2_=omega2_)
Ejemplo n.º 8
0
    def setup_class(cls):
        cls.res2 = tost_clinic_paired_1
        x1, x2 = clinic[:15, 2], clinic[15:, 2]
        cls.res1 = Holder()
        res = smws.ttost_paired(x1, x2, -0.6, 0.6, transform=None)
        cls.res1.pvalue = res[0]
        #cls.res1.df = res[1][-1] not yet
        res_ds = smws.DescrStatsW(x1 - x2, weights=None, ddof=0)
        #tost confint 2*alpha TODO: check again
        cls.res1.tconfint_diff = res_ds.tconfint_mean(0.1)
        cls.res1.confint_05 = res_ds.tconfint_mean(0.05)
        cls.res1.mean_diff = res_ds.mean
        cls.res1.std_mean_diff = res_ds.std_mean

        cls.res2b = ttest_clinic_paired_1
Ejemplo n.º 9
0
def _fstat2effectsize(f_stat, df1, df2):
    """Compute anova effect size from F-statistic

    This might be combined with convert_effectsize_fsqu

    Parameters
    ----------
    f_stat : array_like
        F-statistic corresponding to an F-test
    df1 : int or float
        numerator degrees of freedom, number of constraints
    df2 : int or float
        denominator degrees of freedom, df_resid

    Returns
    -------
    res : Holder instance
        This instance contains effect size measures f2, eta2, omega2 and eps2
        as attributes.

    Notes
    -----
    This uses the following definitions:

       f2 = f_stat * df1 / df2
       eta2 = f2 / (f2 + 1)
       omega2 = (f2 - df1 / df2) / (f2 + 2)
       eps2 = (f2 - df1 / df2) / (f2 + 1)

    This differs from effect size measures in other function which define
    ``f2 = f_stat * df1 / nobs``
    or an equivalent expression for power computation. The noncentrality
    index for the hypothesis test is in those cases given by
    ``nc = f_stat * df1``.

    """
    f2 = f_stat * df1 / df2
    eta2 = f2 / (f2 + 1)
    omega2_ = (f_stat - 1) / (f_stat + (df2 + 1) / df1)
    omega2 = (f2 - df1 / df2) / (f2 + 1 + 1 / df2)  # rewrite
    eps2_ = (f_stat - 1) / (f_stat + df2 / df1)
    eps2 = (f2 - df1 / df2) / (f2 + 1)  # rewrite
    return Holder(f2=f2,
                  eta2=eta2,
                  omega2=omega2,
                  eps2=eps2,
                  eps2_=eps2_,
                  omega2_=omega2_)
Ejemplo n.º 10
0
def convert_effectsize_fsqu(f2=None, eta2=None):
    """convert squared effect sizes in f family

    f2 is signal to noise ratio, var_explained / var_residual
    eta2 is proportion of explained variance, var_explained / var_total
    omega2 is ...

    uses the relationship:
    f2 = eta2 / (1 - eta2)

    """
    if f2 is not None:
        eta2 = 1 / (1 + 1 / f2)

    elif eta2 is not None:
        f2 = eta2 / (1 - eta2)

    res = Holder(f2=f2, eta2=eta2)
    return res
Ejemplo n.º 11
0
def test_mvmean_2indep():
    x = np.asarray([[1.0, 24.0, 23.5, 1.0], [2.0, 25.0, 24.5, 1.0],
                    [3.0, 21.0, 20.5, 1.0], [4.0, 22.0, 20.5, 1.0],
                    [5.0, 23.0, 22.5, 1.0], [6.0, 18.0, 16.5, 1.0],
                    [7.0, 17.0, 16.5, 1.0], [8.0, 28.0, 27.5, 1.0],
                    [9.0, 24.0, 23.5, 1.0], [10.0, 27.0, 25.5, 1.0],
                    [11.0, 21.0, 20.5, 1.0], [12.0, 23.0, 22.5, 1.0],
                    [1.0, 20.0, 19.0, 0.0], [2.0, 23.0, 22.0, 0.0],
                    [3.0, 21.0, 20.0, 0.0], [4.0, 25.0, 24.0, 0.0],
                    [5.0, 18.0, 17.0, 0.0], [6.0, 17.0, 16.0, 0.0],
                    [7.0, 18.0, 17.0, 0.0], [8.0, 24.0, 23.0, 0.0],
                    [9.0, 20.0, 19.0, 0.0], [10.0, 24.0, 22.0, 0.0],
                    [11.0, 23.0, 22.0, 0.0], [12.0, 19.0, 18.0, 0.0]])

    y = np.asarray([[1.1, 24.1, 23.4, 1.1], [1.9, 25.2, 24.3, 1.2],
                    [3.2, 20.9, 20.2, 1.3], [4.1, 21.8, 20.6, 0.9],
                    [5.2, 23.0, 22.7, 0.8], [6.3, 18.1, 16.8, 0.7],
                    [7.1, 17.2, 16.5, 1.0], [7.8, 28.3, 27.4, 1.1],
                    [9.5, 23.9, 23.3, 1.2], [10.1, 26.8, 25.2, 1.3],
                    [10.5, 26.7, 20.6, 0.9], [12.1, 23.0, 22.7, 0.8],
                    [1.1, 20.1, 19.0, 0.7], [1.8, 23.2, 22.0, 0.1],
                    [3.2, 21.3, 20.3, 0.2], [4.3, 24.9, 24.2, 0.3],
                    [5.5, 17.9, 17.1, 0.0], [5.5, 17.8, 16.0, 0.6],
                    [7.1, 17.7, 16.7, 0.0], [7.7, 24.0, 22.8, 0.5],
                    [9.1, 20.1, 18.9, 0.0], [10.2, 24.2, 22.3, 0.3],
                    [11.3, 23.3, 22.2, 0.0], [11.7, 18.8, 18.1, 0.1]])

    res = smmv.test_mvmean_2indep(x, y)

    res_stata = Holder(p_F=0.6686659171701677,
                       df_r=43,
                       df_m=4,
                       F=0.594263378678938,
                       T2=2.5428944576028973)

    assert_allclose(res.statistic, res_stata.F, rtol=1e-10)
    assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-10)
    assert_allclose(res.t2, res_stata.T2, rtol=1e-10)
    assert_equal(res.df, [res_stata.df_m, res_stata.df_r])
Ejemplo n.º 12
0
from numpy import array

from statsmodels.tools.testing import Holder

armarep = Holder()
armarep.comment = ('mlab.garchma(-res_armarep.ar[1:], res_armarep.ma[1:], 20)'
                   'mlab.garchar(-res_armarep.ar[1:], res_armarep.ma[1:], 20)')
armarep.marep = array([
    [-0.1],
    [-0.77],
    [-0.305],
    [0.4635],
    [0.47575],
    [-0.132925],
    [-0.4470625],
    [-0.11719125],
    [0.299054375],
    [0.2432801875],
    [-0.11760340625],
    [-0.253425853125],
    [-0.0326302015625],
    [0.18642558171875],
    [0.11931695210938],
    [-0.08948198932031],
    [-0.14019455634766],
    [0.00148831328242],
    [0.11289980171934],
    [0.05525925023373]])
armarep.ar = array([1., -0.5,  0.8])
armarep.ma = array([1., -0.6,  0.08])
armarep.name = 'armarep'
Ejemplo n.º 13
0
from numpy import array

from statsmodels.tools.testing import Holder

armarep = Holder()
armarep.comment = ('mlab.garchma(-res_armarep.ar[1:], res_armarep.ma[1:], 20)'
                   'mlab.garchar(-res_armarep.ar[1:], res_armarep.ma[1:], 20)')
armarep.marep = array([[-0.1], [-0.77], [-0.305], [0.4635], [0.47575],
                       [-0.132925], [-0.4470625], [-0.11719125], [0.299054375],
                       [0.2432801875], [-0.11760340625], [-0.253425853125],
                       [-0.0326302015625], [0.18642558171875],
                       [0.11931695210938], [-0.08948198932031],
                       [-0.14019455634766], [0.00148831328242],
                       [0.11289980171934], [0.05525925023373]])
armarep.ar = array([1., -0.5, 0.8])
armarep.ma = array([1., -0.6, 0.08])
armarep.name = 'armarep'
armarep.arrep = array([[-1.00000000000000e-01], [-7.80000000000000e-01],
                       [-4.60000000000000e-01], [-2.13600000000000e-01],
                       [-9.13600000000000e-02], [-3.77280000000000e-02],
                       [-1.53280000000000e-02], [-6.17856000000000e-03],
                       [-2.48089600000000e-03], [-9.94252799999999e-04],
                       [-3.98080000000000e-04], [-1.59307776000000e-04],
                       [-6.37382655999999e-05], [-2.54983372800000e-05],
                       [-1.01999411200000e-05], [-4.08009768959999e-06],
                       [-1.63206332416000e-06], [-6.52830179327999e-07],
                       [-2.61133041663999e-07], [-1.04453410652160e-07]])
Ejemplo n.º 14
0
def test_holder():
    holder = Holder()
    holder.new_attr = 1
    assert hasattr(holder, 'new_attr')
    assert getattr(holder, 'new_attr') == 1
Ejemplo n.º 15
0
def test_binom_test():
    #> bt = binom.test(51,235,(1/6),alternative="less")
    #> cat_items(bt, "binom_test_less.")
    binom_test_less = Holder()
    binom_test_less.statistic = 51
    binom_test_less.parameter = 235
    binom_test_less.p_value = 0.982022657605858
    binom_test_less.conf_int = [0, 0.2659460862574313]
    binom_test_less.estimate = 0.2170212765957447
    binom_test_less.null_value = 1. / 6
    binom_test_less.alternative = 'less'
    binom_test_less.method = 'Exact binomial test'
    binom_test_less.data_name = '51 and 235'

    #> bt = binom.test(51,235,(1/6),alternative="greater")
    #> cat_items(bt, "binom_test_greater.")
    binom_test_greater = Holder()
    binom_test_greater.statistic = 51
    binom_test_greater.parameter = 235
    binom_test_greater.p_value = 0.02654424571169085
    binom_test_greater.conf_int = [0.1735252778065201, 1]
    binom_test_greater.estimate = 0.2170212765957447
    binom_test_greater.null_value = 1. / 6
    binom_test_greater.alternative = 'greater'
    binom_test_greater.method = 'Exact binomial test'
    binom_test_greater.data_name = '51 and 235'

    #> bt = binom.test(51,235,(1/6),alternative="t")
    #> cat_items(bt, "binom_test_2sided.")
    binom_test_2sided = Holder()
    binom_test_2sided.statistic = 51
    binom_test_2sided.parameter = 235
    binom_test_2sided.p_value = 0.0437479701823997
    binom_test_2sided.conf_int = [0.1660633298083073, 0.2752683640289254]
    binom_test_2sided.estimate = 0.2170212765957447
    binom_test_2sided.null_value = 1. / 6
    binom_test_2sided.alternative = 'two.sided'
    binom_test_2sided.method = 'Exact binomial test'
    binom_test_2sided.data_name = '51 and 235'

    alltests = [('larger', binom_test_greater),
                ('smaller', binom_test_less),
                ('two-sided', binom_test_2sided)]

    for alt, res0 in alltests:
        # only p-value is returned
        res = smprop.binom_test(51, 235, prop=1. / 6, alternative=alt)
        #assert_almost_equal(res[0], res0.statistic)
        assert_almost_equal(res, res0.p_value, decimal=13)

    # R binom_test returns Copper-Pearson confint
    ci_2s = smprop.proportion_confint(51, 235, alpha=0.05, method='beta')
    ci_low, ci_upp = smprop.proportion_confint(51, 235, alpha=0.1,
                                               method='beta')
    assert_almost_equal(ci_2s, binom_test_2sided.conf_int, decimal=13)
    assert_almost_equal(ci_upp, binom_test_less.conf_int[1], decimal=13)
    assert_almost_equal(ci_low, binom_test_greater.conf_int[0], decimal=13)
Ejemplo n.º 16
0
19    2 3.46 3.60 2.97 1.80 1.74
20    2 4.01 3.48 4.42 3.06 2.76
21    2 3.04 2.87 2.87 2.71 2.87
22    2 3.47 3.24 3.47 3.26 3.14
23    2 4.06 3.92 3.18 3.06 1.74
24    2 2.91 3.99 3.06 2.02 3.18
25    2 3.59 4.21 4.02 3.26 2.85
26    2 4.51 4.21 3.78 2.63 1.92
27    2 3.16 3.31 3.28 3.25 3.52
28    2 3.86 3.61 3.28 3.19 3.09
29    2 3.31 2.97 3.76 3.18 2.60
30    2 3.02 2.73 3.87 3.50 2.93'''.split()
clinic = np.array(raw_clinic, float).reshape(-1, 7)

#t = tost(-clinic$var2[16:30] + clinic$var2[1:15], eps=0.6)
tost_clinic_paired = Holder()
tost_clinic_paired.sample = 'paired'
tost_clinic_paired.mean_diff = 0.5626666666666665
tost_clinic_paired.se_diff = 0.2478276410785118
tost_clinic_paired.alpha = 0.05
tost_clinic_paired.ci_diff = (0.1261653305099018, 0.999168002823431)
tost_clinic_paired.df = 14
tost_clinic_paired.epsilon = 0.6
tost_clinic_paired.result = 'not rejected'
tost_clinic_paired.p_value = 0.4412034046017588
tost_clinic_paired.check_me = (0.525333333333333, 0.6)

#> t = tost(-clinic$var1[16:30] + clinic$var1[1:15], eps=0.6)
#> cat_items(t, prefix="tost_clinic_paired_1.")
tost_clinic_paired_1 = Holder()
tost_clinic_paired_1.mean_diff = 0.1646666666666667
Ejemplo n.º 17
0
 def setup_class(cls):
     cls.res2 = tost_clinic_paired
     x, y = clinic[:15, 3], clinic[15:, 3]
     cls.res1 = Holder()
     res = smws.ttost_paired(x, y, -0.6, 0.6, transform=None)
     cls.res1.pvalue = res[0]
Ejemplo n.º 18
0
def test_binom_test():
    #> bt = binom.test(51,235,(1/6),alternative="less")
    #> cat_items(bt, "binom_test_less.")
    binom_test_less = Holder()
    binom_test_less.statistic = 51
    binom_test_less.parameter = 235
    binom_test_less.p_value = 0.982022657605858
    binom_test_less.conf_int = [0, 0.2659460862574313]
    binom_test_less.estimate = 0.2170212765957447
    binom_test_less.null_value = 1. / 6
    binom_test_less.alternative = 'less'
    binom_test_less.method = 'Exact binomial test'
    binom_test_less.data_name = '51 and 235'

    #> bt = binom.test(51,235,(1/6),alternative="greater")
    #> cat_items(bt, "binom_test_greater.")
    binom_test_greater = Holder()
    binom_test_greater.statistic = 51
    binom_test_greater.parameter = 235
    binom_test_greater.p_value = 0.02654424571169085
    binom_test_greater.conf_int = [0.1735252778065201, 1]
    binom_test_greater.estimate = 0.2170212765957447
    binom_test_greater.null_value = 1. / 6
    binom_test_greater.alternative = 'greater'
    binom_test_greater.method = 'Exact binomial test'
    binom_test_greater.data_name = '51 and 235'

    #> bt = binom.test(51,235,(1/6),alternative="t")
    #> cat_items(bt, "binom_test_2sided.")
    binom_test_2sided = Holder()
    binom_test_2sided.statistic = 51
    binom_test_2sided.parameter = 235
    binom_test_2sided.p_value = 0.0437479701823997
    binom_test_2sided.conf_int = [0.1660633298083073, 0.2752683640289254]
    binom_test_2sided.estimate = 0.2170212765957447
    binom_test_2sided.null_value = 1. / 6
    binom_test_2sided.alternative = 'two.sided'
    binom_test_2sided.method = 'Exact binomial test'
    binom_test_2sided.data_name = '51 and 235'

    alltests = [('larger', binom_test_greater), ('smaller', binom_test_less),
                ('two-sided', binom_test_2sided)]

    for alt, res0 in alltests:
        # only p-value is returned
        res = smprop.binom_test(51, 235, prop=1. / 6, alternative=alt)
        #assert_almost_equal(res[0], res0.statistic)
        assert_almost_equal(res, res0.p_value, decimal=13)

    # R binom_test returns Copper-Pearson confint
    ci_2s = smprop.proportion_confint(51, 235, alpha=0.05, method='beta')
    ci_low, ci_upp = smprop.proportion_confint(51,
                                               235,
                                               alpha=0.1,
                                               method='beta')
    assert_almost_equal(ci_2s, binom_test_2sided.conf_int, decimal=13)
    assert_almost_equal(ci_upp, binom_test_less.conf_int[1], decimal=13)
    assert_almost_equal(ci_low, binom_test_greater.conf_int[0], decimal=13)
Ejemplo n.º 19
0
def test_chisquare():
    # TODO: no tests for ``value`` yet
    res1 = Holder()
    res2 = Holder()
    #> freq = c(1048,  660,  510,  420,  362)
    #> pr1 = c(1020,  690,  510,  420,  360)
    #> pr2 = c(1050,  660,  510,  420,  360)
    #> c = chisq.test(freq, p=pr1, rescale.p = TRUE)
    #> cat_items(c, "res1.")
    res1.statistic = 2.084086388178453
    res1.parameter = 4
    res1.p_value = 0.72029651761105
    res1.method = 'Chi-squared test for given probabilities'
    res1.data_name = 'freq'
    res1.observed = np.array([1048, 660, 510, 420, 362])
    res1.expected = np.array([1020, 690, 510, 420, 360])
    res1.residuals = np.array([
        0.876714007519206, -1.142080481440321, -2.517068894406109e-15,
        -2.773674830645328e-15, 0.105409255338946
    ])

    #> c = chisq.test(freq, p=pr2, rescale.p = TRUE)
    #> cat_items(c, "res2.")
    res2.statistic = 0.01492063492063492
    res2.parameter = 4
    res2.p_value = 0.999972309849908
    res2.method = 'Chi-squared test for given probabilities'
    res2.data_name = 'freq'
    res2.observed = np.array([1048, 660, 510, 420, 362])
    res2.expected = np.array([1050, 660, 510, 420, 360])
    res2.residuals = np.array([
        -0.06172133998483677, 0, -2.517068894406109e-15,
        -2.773674830645328e-15, 0.105409255338946
    ])

    freq = np.array([1048, 660, 510, 420, 362])
    pr1 = np.array([1020, 690, 510, 420, 360])
    pr2 = np.array([1050, 660, 510, 420, 360])

    for pr, res in zip([pr1, pr2], [res1, res2]):
        stat, pval = chisquare(freq, pr)
        assert_almost_equal(stat, res.statistic, decimal=12)
        assert_almost_equal(pval, res.p_value, decimal=13)
Ejemplo n.º 20
0
def test_cohens_kappa_irr():

    ck_w3 = Holder()
    ck_w4 = Holder()

    #>r = kappa2(anxiety[,1:2], c(0,0,0,1,1,1))
    #> cat_items(r, pref="ck_w3.")
    ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,0,1,1,1)"
    ck_w3.irr_name = 'Kappa'
    ck_w3.value = 0.1891892
    ck_w3.stat_name = 'z'
    ck_w3.statistic = 0.5079002
    ck_w3.p_value = 0.6115233

    #> r = kappa2(anxiety[,1:2], c(0,0,1,1,2,2))
    #> cat_items(r, pref="ck_w4.")
    ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2,2)"
    ck_w4.irr_name = 'Kappa'
    ck_w4.value = 0.2820513
    ck_w4.stat_name = 'z'
    ck_w4.statistic = 1.257410
    ck_w4.p_value = 0.2086053

    ck_w1 = Holder()
    ck_w2 = Holder()
    ck_w3 = Holder()
    ck_w4 = Holder()
    #> r = kappa2(anxiety[,2:3])
    #> cat_items(r, pref="ck_w1.")
    ck_w1.method = "Cohen's Kappa for 2 Raters (Weights: unweighted)"
    ck_w1.irr_name = 'Kappa'
    ck_w1.value = -0.006289308
    ck_w1.stat_name = 'z'
    ck_w1.statistic = -0.0604067
    ck_w1.p_value = 0.9518317

    #> r = kappa2(anxiety[,2:3], "equal")
    #> cat_items(r, pref="ck_w2.")
    ck_w2.method = "Cohen's Kappa for 2 Raters (Weights: equal)"
    ck_w2.irr_name = 'Kappa'
    ck_w2.value = 0.1459075
    ck_w2.stat_name = 'z'
    ck_w2.statistic = 1.282472
    ck_w2.p_value = 0.1996772

    #> r = kappa2(anxiety[,2:3], "squared")
    #> cat_items(r, pref="ck_w3.")
    ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: squared)"
    ck_w3.irr_name = 'Kappa'
    ck_w3.value = 0.2520325
    ck_w3.stat_name = 'z'
    ck_w3.statistic = 1.437451
    ck_w3.p_value = 0.1505898

    #> r = kappa2(anxiety[,2:3], c(0,0,1,1,2))
    #> cat_items(r, pref="ck_w4.")
    ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2)"
    ck_w4.irr_name = 'Kappa'
    ck_w4.value = 0.2391304
    ck_w4.stat_name = 'z'
    ck_w4.statistic = 1.223734
    ck_w4.p_value = 0.2210526

    all_cases = [(ck_w1, None, None),
                 (ck_w2, None, 'linear'),
                 (ck_w2, np.arange(5), None),
                 (ck_w2, np.arange(5), 'toeplitz'),
                 (ck_w3, None, 'quadratic'),
                 (ck_w3, np.arange(5)**2, 'toeplitz'),
                 (ck_w3, 4*np.arange(5)**2, 'toeplitz'),
                 (ck_w4, [0,0,1,1,2], 'toeplitz')]

    #Note R:irr drops the missing category level 4 and uses the reduced matrix
    r = np.histogramdd(anxiety[:,1:], ([1, 2, 3, 4, 6, 7], [1, 2, 3, 4, 6, 7]))

    for res2, w, wt in all_cases:
        msg = repr(w) + repr(wt)
        res1 = cohens_kappa(r[0], weights=w, wt=wt)
        assert_almost_equal(res1.kappa, res2.value, decimal=6, err_msg=msg)
        assert_almost_equal(res1.z_value, res2.statistic, decimal=5, err_msg=msg)
        assert_almost_equal(res1.pvalue_two_sided, res2.p_value, decimal=6, err_msg=msg)
Ejemplo n.º 21
0
Created on Sun Jun 30 20:25:22 2013

Author: Josef Perktold
"""

import pytest
import numpy as np
from numpy.testing import assert_allclose

from statsmodels.tools.tools import add_constant
from statsmodels.tools.testing import Holder
from statsmodels.miscmodels.tmodel import TLinearModel


mm = Holder()
mm.date_label = ["Apr.1982",  "Apr.1983", "Apr.1984", "Apr.1985", "Apr.1986",
                 "Aug.1982", "Aug.1983",  "Aug.1984", "Aug.1985", "Aug.1986",
                 "Dec.1982", "Dec.1983", "Dec.1984",  "Dec.1985", "Dec.1986",
                 "Feb.1284", "Feb.1982", "Feb.1983", "Feb.1985",  "Feb.1986",
                 "Jan.1982", "Jan.1983", "Jan.1984", "Jan.1985", "Jan.1986",
                 "Jul.1982", "July1983", "July1984", "July1985", "July1986",
                 "June1982",  "June1983", "June1984", "June1985", "June1986",
                 "Mar.1982", "Mar.1983",  "Mar.1984", "Mar.1985", "Mar.1986",
                 "May1982", "May1983", "May1984",  "May1985", "May1986",
                 "Nov.1982", "Nov.1983", "Nov.1984", "Nov.1985",  "Nov.1986",
                 "Oct.1982", "Oct.1983", "Oct.1984", "Oct.1985", "Oct.1986",
                 "Sept.1982", "Sept.1983", "Sept.1984", "Sept.1985",
                 "Sept.1986"]

mm.m_marietta = np.array([
Ejemplo n.º 22
0
from numpy import array

from statsmodels.tools.testing import Holder


mlpacf = Holder()
mlpacf.comment = 'mlab.parcorr(x, [], 2, nout=3)'
mlpacf.name = 'mlpacf'
mlpacf.lags1000 = array([
    [0.],
    [1.],
    [2.],
    [3.],
    [4.],
    [5.],
    [6.],
    [7.],
    [8.],
    [9.],
    [10.],
    [11.],
    [12.],
    [13.],
    [14.],
    [15.],
    [16.],
    [17.],
    [18.],
    [19.],
    [20.]])
mlpacf.bounds1000 = array([
Ejemplo n.º 23
0
from statsmodels.stats.correlation_tools import (
    corr_nearest, corr_clipped, cov_nearest,
    _project_correlation_factors, corr_nearest_factor, _spg_optim,
    corr_thresholded, cov_nearest_factor_homog, FactoredPSDMatrix)
from statsmodels.tools.testing import Holder


def norm_f(x, y):
    '''Frobenious norm (squared sum) of difference between two arrays
    '''
    d = ((x - y)**2).sum()
    return np.sqrt(d)


# R library Matrix results
cov1_r = Holder()
#> nc  <- nearPD(pr, conv.tol = 1e-7, keepDiag = TRUE, doDykstra =FALSE, corr=TRUE)
#> cat_items(nc, prefix="cov1_r.")
cov1_r.mat = '''<S4 object of class structure("dpoMatrix", package = "Matrix")>'''
cov1_r.eigenvalues = np.array([
     4.197315628646795, 0.7540460243978023, 0.5077608149667492,
     0.3801267599652769, 0.1607508970775889, 4.197315628646795e-08
    ])
cov1_r.corr = '''TRUE'''
cov1_r.normF = 0.0743805226512533
cov1_r.iterations = 11
cov1_r.rel_tol = 8.288594638441735e-08
cov1_r.converged = '''TRUE'''
#> mkarray2(as.matrix(nc$mat), name="cov1_r.mat")
cov1_r.mat = np.array([
     1, 0.487968018215892, 0.642651880010906, 0.4906386709070835,
Ejemplo n.º 24
0
def test_mv_mean():
    # names = ['id', 'mpg1', 'mpg2', 'add']
    x = np.asarray([[1.0, 24.0, 23.5, 1.0], [2.0, 25.0, 24.5, 1.0],
                    [3.0, 21.0, 20.5, 1.0], [4.0, 22.0, 20.5, 1.0],
                    [5.0, 23.0, 22.5, 1.0], [6.0, 18.0, 16.5, 1.0],
                    [7.0, 17.0, 16.5, 1.0], [8.0, 28.0, 27.5, 1.0],
                    [9.0, 24.0, 23.5, 1.0], [10.0, 27.0, 25.5, 1.0],
                    [11.0, 21.0, 20.5, 1.0], [12.0, 23.0, 22.5, 1.0],
                    [1.0, 20.0, 19.0, 0.0], [2.0, 23.0, 22.0, 0.0],
                    [3.0, 21.0, 20.0, 0.0], [4.0, 25.0, 24.0, 0.0],
                    [5.0, 18.0, 17.0, 0.0], [6.0, 17.0, 16.0, 0.0],
                    [7.0, 18.0, 17.0, 0.0], [8.0, 24.0, 23.0, 0.0],
                    [9.0, 20.0, 19.0, 0.0], [10.0, 24.0, 22.0, 0.0],
                    [11.0, 23.0, 22.0, 0.0], [12.0, 19.0, 18.0, 0.0]])

    res = smmv.test_mvmean(x[:, 1:3], [21, 21])

    res_stata = Holder(p_F=1.25062334808e-09,
                       df_r=22,
                       df_m=2,
                       F=59.91609589041116,
                       T2=125.2791095890415)

    assert_allclose(res.statistic, res_stata.F, rtol=1e-10)
    assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-10)
    assert_allclose(res.t2, res_stata.T2, rtol=1e-10)
    assert_equal(res.df, [res_stata.df_m, res_stata.df_r])

    # diff of paired sample
    mask = x[:, -1] == 1
    x1 = x[mask, 1:3]
    x0 = x[~mask, 1:3]
    res_p = smmv.test_mvmean(x1 - x0, [0, 0])

    # result Stata hotelling
    res_stata = Holder(
        T2=9.698067632850247,
        df=10,
        k=2,
        N=12,
        F=4.4082126,  # not in return List
        p_F=0.0424)  # not in return List

    res = res_p
    assert_allclose(res.statistic, res_stata.F, atol=5e-7)
    assert_allclose(res.pvalue, res_stata.p_F, atol=5e-4)
    assert_allclose(res.t2, res_stata.T2, rtol=1e-10)
    assert_equal(res.df, [res_stata.k, res_stata.df])

    # mvtest means diff1 diff2, zero
    res_stata = Holder(p_F=.0423949782937231,
                       df_r=10,
                       df_m=2,
                       F=4.408212560386478,
                       T2=9.69806763285025)

    assert_allclose(res.statistic, res_stata.F, rtol=1e-12)
    assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-12)
    assert_allclose(res.t2, res_stata.T2, rtol=1e-12)
    assert_equal(res.df, [res_stata.df_m, res_stata.df_r])

    dw = weightstats.DescrStatsW(x)
    ci0 = dw.tconfint_mean(alpha=0.05)

    nobs = len(x[:, 1:])
    ci1 = confint_mvmean_fromstats(dw.mean,
                                   np.diag(dw.var),
                                   nobs,
                                   lin_transf=np.eye(4),
                                   alpha=0.05)
    ci2 = confint_mvmean_fromstats(dw.mean,
                                   dw.cov,
                                   nobs,
                                   lin_transf=np.eye(4),
                                   alpha=0.05)

    assert_allclose(ci1[:2], ci0, rtol=1e-13)
    assert_allclose(ci2[:2], ci0, rtol=1e-13)

    # test from data
    res = smmv.confint_mvmean(x, lin_transf=np.eye(4), alpha=0.05)
    assert_allclose(res, ci2, rtol=1e-13)
Ejemplo n.º 25
0
    def setup(self):
        self.n_success = np.array([ 73,  90, 114,  75])
        self.nobs = np.array([ 86,  93, 136,  82])

        self.res_ppt_pvals_raw = np.array([
                 0.00533824886503131, 0.8327574849753566, 0.1880573726722516,
                 0.002026764254350234, 0.1309487516334318, 0.1076118730631731
                ])
        self.res_ppt_pvals_holm = np.array([
                 0.02669124432515654, 0.8327574849753566, 0.4304474922526926,
                 0.0121605855261014, 0.4304474922526926, 0.4304474922526926
                ])

        res_prop_test = Holder()
        res_prop_test.statistic = 11.11938768628861
        res_prop_test.parameter = 3
        res_prop_test.p_value = 0.011097511366581344
        res_prop_test.estimate = np.array([
             0.848837209302326, 0.967741935483871, 0.838235294117647,
             0.9146341463414634
            ]).reshape(4,1, order='F')
        res_prop_test.null_value = '''NULL'''
        res_prop_test.conf_int = '''NULL'''
        res_prop_test.alternative = 'two.sided'
        res_prop_test.method = '4-sample test for equality of proportions ' + \
                               'without continuity correction'
        res_prop_test.data_name = 'smokers2 out of patients'
        self.res_prop_test = res_prop_test

        #> pt = prop.test(smokers2, patients, p=rep(c(0.9), 4), correct=FALSE)
        #> cat_items(pt, "res_prop_test_val.")
        res_prop_test_val = Holder()
        res_prop_test_val.statistic = np.array([
             13.20305530710751
            ]).reshape(1,1, order='F')
        res_prop_test_val.parameter = np.array([
             4
            ]).reshape(1,1, order='F')
        res_prop_test_val.p_value = 0.010325090041836
        res_prop_test_val.estimate = np.array([
             0.848837209302326, 0.967741935483871, 0.838235294117647,
             0.9146341463414634
            ]).reshape(4,1, order='F')
        res_prop_test_val.null_value = np.array([
             0.9, 0.9, 0.9, 0.9
            ]).reshape(4,1, order='F')
        res_prop_test_val.conf_int = '''NULL'''
        res_prop_test_val.alternative = 'two.sided'
        res_prop_test_val.method = '4-sample test for given proportions without continuity correction'
        res_prop_test_val.data_name = 'smokers2 out of patients, null probabilities rep(c(0.9), 4)'
        self.res_prop_test_val = res_prop_test_val

        #> pt = prop.test(smokers2[1], patients[1], p=0.9, correct=FALSE)
        #> cat_items(pt, "res_prop_test_1.")
        res_prop_test_1 = Holder()
        res_prop_test_1.statistic = 2.501291989664086
        res_prop_test_1.parameter = 1
        res_prop_test_1.p_value = 0.113752943640092
        res_prop_test_1.estimate = 0.848837209302326
        res_prop_test_1.null_value = 0.9
        res_prop_test_1.conf_int = np.array([0.758364348004061,
                                             0.9094787701686766])
        res_prop_test_1.alternative = 'two.sided'
        res_prop_test_1.method = '1-sample proportions test without continuity correction'
        res_prop_test_1.data_name = 'smokers2[1] out of patients[1], null probability 0.9'
        self.res_prop_test_1 = res_prop_test_1
Ejemplo n.º 26
0
'''Generated Random Processes for tests

autogenerated by savervs.py

'''

from numpy import array

from statsmodels.tools.testing import Holder


rvsdata = Holder()
rvsdata.comment = 'generated data, divide by 1000, see savervs'
rvsdata.xarma32 = array([
    -1271, -1222, -840, -169, -1016, -980, -1272, -926, 445, 833,
    -91, -1974, -2231, -549, 424, 238, -1665, -1815, 685, 3361,
    1912, -1931, -3555, -1817, 387, 730, -1154, -702, 973, 1340,
    -161, 276, 200, 1785, 834, -1469, -1593, -134, 555, -422,
    -2314, -1326, -2268, -3579, -3049, -930, 1155, 962, -644, -217,
    -561, 224, 810, 2445, 2710, 2152, 502, 21, 164, -499,
    -1093, -492, 531, -605, -1535, -2081, -3816, -2257, 487, 2134,
    1785, 1495, 1259, 1895, 1339, 617, 1143, 385, -1220, -738,
    1171, 1047, -234, -107, -1458, -1244, -2737, 33, 2373, 2749,
    2725, 3331, 1054, 418, 1231, -1171, -1446, -1187, 863, 1386,
    757, 734, 283, -735, 550, 417, -236, 324, 318, -102,
    2126, 3246, 2358, 2156, 726, -983, -803, -242, -500, -13,
    49, 308, -227, 243, -612, -2329, -2476, -3441, -5435, -4693,
    -2538, -2159, -2656, -906, -211, -288, 1777, 1363, 564, -2035,
    -1134, -609, -1112, 560, 658, 1533, 796, 523, 456, 76,
    -1164, -749, -1084, -3218, -2107, -310, -686, -1625, 2008, 4155,
    1650, -1086, -673, 1634, 1999, 449, -1077, -648, -155, -327,
Ejemplo n.º 27
0
def simulate_power_equivalence_oneway(means, nobs, equiv_margin, vars_=None,
                                      k_mc=1000, trim_frac=0,
                                      options_var=None, margin_type="f2"
                                      ):  # , anova_options=None):  #TODO
    """Simulate Power for oneway equivalence test (Wellek's Anova)

    This function is experimental and written to evaluate asymptotic power
    function. This function will change without backwards compatibility
    constraints. The only part that is stable is `pvalue` attribute in results.

    Effect size for equivalence margin

    """
    if options_var is None:
        options_var = ["unequal", "equal", "bf"]
    if vars_ is not None:
        stds = np.sqrt(vars_)
    else:
        stds = np.ones(len(means))

    nobs_mean = nobs.mean()
    n_groups = len(nobs)
    res_mc = []
    f_mc = []
    reject_mc = []
    other_mc = []
    for _ in range(k_mc):
        y0, y1, y2, y3 = [m + std * np.random.randn(n)
                          for (n, m, std) in zip(nobs, means, stds)]

        res_i = []
        f_i = []
        reject_i = []
        other_i = []
        for uv in options_var:
            # for welch in options_welch:
            # res1 = sma.anova_generic(means, vars_, nobs, use_var=uv,
            #                          welch_correction=welch)
            res0 = anova_oneway([y0, y1, y2, y3], use_var=uv,
                                trim_frac=trim_frac)
            f_stat = res0.statistic
            res1 = equivalence_oneway_generic(f_stat, n_groups, nobs.sum(),
                                              equiv_margin, res0.df,
                                              alpha=0.05,
                                              margin_type=margin_type)
            res_i.append(res1.pvalue)
            es_wellek = f_stat * (n_groups - 1) / nobs_mean
            f_i.append(es_wellek)
            reject_i.append(res1.reject)
            other_i.extend([res1.crit_f, res1.crit_es, res1.power_zero])
        res_mc.append(res_i)
        f_mc.append(f_i)
        reject_mc.append(reject_i)
        other_mc.append(other_i)

    f_mc = np.asarray(f_mc)
    other_mc = np.asarray(other_mc)
    res_mc = np.asarray(res_mc)
    reject_mc = np.asarray(reject_mc)
    res = Holder(f_stat=f_mc,
                 other=other_mc,
                 pvalue=res_mc,
                 reject=reject_mc
                 )
    return res
Ejemplo n.º 28
0
'''Generated Random Processes for tests

autogenerated by savervs.py

'''

from numpy import array

from statsmodels.tools.testing import Holder

rvsdata = Holder()
rvsdata.comment = 'generated data, divide by 1000, see savervs'
rvsdata.xarma32 = array([
    -1271, -1222, -840, -169, -1016, -980, -1272, -926, 445, 833, -91, -1974,
    -2231, -549, 424, 238, -1665, -1815, 685, 3361, 1912, -1931, -3555, -1817,
    387, 730, -1154, -702, 973, 1340, -161, 276, 200, 1785, 834, -1469, -1593,
    -134, 555, -422, -2314, -1326, -2268, -3579, -3049, -930, 1155, 962, -644,
    -217, -561, 224, 810, 2445, 2710, 2152, 502, 21, 164, -499, -1093, -492,
    531, -605, -1535, -2081, -3816, -2257, 487, 2134, 1785, 1495, 1259, 1895,
    1339, 617, 1143, 385, -1220, -738, 1171, 1047, -234, -107, -1458, -1244,
    -2737, 33, 2373, 2749, 2725, 3331, 1054, 418, 1231, -1171, -1446, -1187,
    863, 1386, 757, 734, 283, -735, 550, 417, -236, 324, 318, -102, 2126, 3246,
    2358, 2156, 726, -983, -803, -242, -500, -13, 49, 308, -227, 243, -612,
    -2329, -2476, -3441, -5435, -4693, -2538, -2159, -2656, -906, -211, -288,
    1777, 1363, 564, -2035, -1134, -609, -1112, 560, 658, 1533, 796, 523, 456,
    76, -1164, -749, -1084, -3218, -2107, -310, -686, -1625, 2008, 4155, 1650,
    -1086, -673, 1634, 1999, 449, -1077, -648, -155, -327, 228, 1295, 2036,
    542, -197, -451, -1554, -2416, -2066, -2146, -1524, -1976, -2962, -2621,
    -2313, -2052, -3314, -2363, -1522, -3305, -3445, -3206, -1501, 2029, 1963,
    1168, 2050, 2927, 2019, 84, 213, 1783, 617, -767, -425, 739, 281, 506,
    -749, -938, -284, -147, 51, 1296, 3033, 2263, 1409, -1702, -819, -1295,
Ejemplo n.º 29
0
    def setup(self):
        self.n_success = np.array([73, 90, 114, 75])
        self.nobs = np.array([86, 93, 136, 82])

        self.res_ppt_pvals_raw = np.array([
            0.00533824886503131, 0.8327574849753566, 0.1880573726722516,
            0.002026764254350234, 0.1309487516334318, 0.1076118730631731
        ])
        self.res_ppt_pvals_holm = np.array([
            0.02669124432515654, 0.8327574849753566, 0.4304474922526926,
            0.0121605855261014, 0.4304474922526926, 0.4304474922526926
        ])

        res_prop_test = Holder()
        res_prop_test.statistic = 11.11938768628861
        res_prop_test.parameter = 3
        res_prop_test.p_value = 0.011097511366581344
        res_prop_test.estimate = np.array([
            0.848837209302326, 0.967741935483871, 0.838235294117647,
            0.9146341463414634
        ]).reshape(4, 1, order='F')
        res_prop_test.null_value = '''NULL'''
        res_prop_test.conf_int = '''NULL'''
        res_prop_test.alternative = 'two.sided'
        res_prop_test.method = '4-sample test for equality of proportions ' + \
                               'without continuity correction'
        res_prop_test.data_name = 'smokers2 out of patients'
        self.res_prop_test = res_prop_test

        #> pt = prop.test(smokers2, patients, p=rep(c(0.9), 4), correct=FALSE)
        #> cat_items(pt, "res_prop_test_val.")
        res_prop_test_val = Holder()
        res_prop_test_val.statistic = np.array([13.20305530710751
                                                ]).reshape(1, 1, order='F')
        res_prop_test_val.parameter = np.array([4]).reshape(1, 1, order='F')
        res_prop_test_val.p_value = 0.010325090041836
        res_prop_test_val.estimate = np.array([
            0.848837209302326, 0.967741935483871, 0.838235294117647,
            0.9146341463414634
        ]).reshape(4, 1, order='F')
        res_prop_test_val.null_value = np.array([0.9, 0.9, 0.9,
                                                 0.9]).reshape(4, 1, order='F')
        res_prop_test_val.conf_int = '''NULL'''
        res_prop_test_val.alternative = 'two.sided'
        res_prop_test_val.method = '4-sample test for given proportions without continuity correction'
        res_prop_test_val.data_name = 'smokers2 out of patients, null probabilities rep(c(0.9), 4)'
        self.res_prop_test_val = res_prop_test_val

        #> pt = prop.test(smokers2[1], patients[1], p=0.9, correct=FALSE)
        #> cat_items(pt, "res_prop_test_1.")
        res_prop_test_1 = Holder()
        res_prop_test_1.statistic = 2.501291989664086
        res_prop_test_1.parameter = 1
        res_prop_test_1.p_value = 0.113752943640092
        res_prop_test_1.estimate = 0.848837209302326
        res_prop_test_1.null_value = 0.9
        res_prop_test_1.conf_int = np.array(
            [0.758364348004061, 0.9094787701686766])
        res_prop_test_1.alternative = 'two.sided'
        res_prop_test_1.method = '1-sample proportions test without continuity correction'
        res_prop_test_1.data_name = 'smokers2[1] out of patients[1], null probability 0.9'
        self.res_prop_test_1 = res_prop_test_1
Ejemplo n.º 30
0
def test_rank_compare_2indep1():
    # Example from Munzel and Hauschke 2003
    # data is given by counts, expand to observations
    levels = [-2, -1, 0, 1, 2]
    new = [24, 37, 21, 19, 6]
    active = [11, 51, 22, 21, 7]
    x1 = np.repeat(levels, new)
    x2 = np.repeat(levels, active)

    # using lawstat
    # > brunner.munzel.test(xn, xa) #brunnermunzel.test(x, y)
    res2_t = Holder(statistic=1.1757561456582,
                    df=204.2984239868,
                    pvalue=0.2410606649547,
                    ci=[0.4700629827705593, 0.6183882855872511],
                    prob=0.5442256341789052)

    res = rank_compare_2indep(x1, x2, use_t=False)
    assert_allclose(res.statistic, -res2_t.statistic, rtol=1e-13)
    assert_allclose(res.prob1, 1 - res2_t.prob, rtol=1e-13)
    assert_allclose(res.prob2, res2_t.prob, rtol=1e-13)
    tt = res.test_prob_superior()
    # TODO: return HolderTuple
    # assert_allclose(tt.statistic, res2_t.statistic)
    # TODO: check sign/direction in lawstat
    assert_allclose(tt[0], -res2_t.statistic, rtol=1e-13)

    ci = res.conf_int(alpha=0.05)
    # we compare normal confint with t confint, lower rtol
    assert_allclose(ci, 1 - np.array(res2_t.ci)[::-1], rtol=0.005)
    # test consistency of test and confint
    res_lb = res.test_prob_superior(value=ci[0])
    assert_allclose(res_lb[1], 0.05, rtol=1e-13)
    res_ub = res.test_prob_superior(value=ci[1])
    assert_allclose(res_ub[1], 0.05, rtol=1e-13)

    # test consistency of tost and confint
    # lower margin is binding, alternative larger
    res_tost = res.tost_prob_superior(ci[0], ci[1] * 1.05)
    assert_allclose(res_tost.results_larger.pvalue, 0.025, rtol=1e-13)
    assert_allclose(res_tost.pvalue, 0.025, rtol=1e-13)

    # upper margin is binding, alternative smaller
    res_tost = res.tost_prob_superior(ci[0] * 0.85, ci[1])
    assert_allclose(res_tost.results_smaller.pvalue, 0.025, rtol=1e-13)
    assert_allclose(res_tost.pvalue, 0.025, rtol=1e-13)

    # use t-distribution
    # our ranking is defined as reversed from lawstat, and BM article
    # revere direction to match our definition
    x1, x2 = x2, x1
    res = rank_compare_2indep(x1, x2, use_t=True)
    assert_allclose(res.statistic, res2_t.statistic, rtol=1e-13)
    tt = res.test_prob_superior()
    # TODO: return HolderTuple
    # assert_allclose(tt.statistic, res2_t.statistic)
    # TODO: check sign/direction in lawstat, reversed from ours
    assert_allclose(tt[0], res2_t.statistic, rtol=1e-13)
    assert_allclose(tt[1], res2_t.pvalue, rtol=1e-13)
    assert_allclose(res.pvalue, res2_t.pvalue, rtol=1e-13)
    assert_allclose(res.df, res2_t.df, rtol=1e-13)

    ci = res.conf_int(alpha=0.05)
    assert_allclose(ci, res2_t.ci, rtol=1e-11)
    # test consistency of test and confint
    res_lb = res.test_prob_superior(value=ci[0])
    assert_allclose(res_lb[1], 0.05, rtol=1e-11)
    res_ub = res.test_prob_superior(value=ci[1])
    assert_allclose(res_ub[1], 0.05, rtol=1e-11)

    # test consistency of tost and confint
    # lower margin is binding, alternative larger
    res_tost = res.tost_prob_superior(ci[0], ci[1] * 1.05)
    assert_allclose(res_tost.results_larger.pvalue, 0.025, rtol=1e-10)
    assert_allclose(res_tost.pvalue, 0.025, rtol=1e-10)

    # upper margin is binding, alternative smaller
    res_tost = res.tost_prob_superior(ci[0] * 0.85, ci[1])
    assert_allclose(res_tost.results_smaller.pvalue, 0.025, rtol=1e-10)
    assert_allclose(res_tost.pvalue, 0.025, rtol=1e-10)

    # extras
    # cohen's d
    esd = res.effectsize_normal()
    p = prob_larger_continuous(stats.norm(loc=esd), stats.norm)
    # round trip
    assert_allclose(p, res.prob1, rtol=1e-13)

    # round trip with cohen's d
    pc = cohensd2problarger(esd)
    assert_allclose(pc, res.prob1, rtol=1e-13)

    ci_tr = res.confint_lintransf(1, -1)
    assert_allclose(ci_tr, 1 - np.array(res2_t.ci)[::-1], rtol=0.005)
Ejemplo n.º 31
0
# ## fit ordered cloglog model
# r_cloglog <- polr(apply ~ pared + public + gpa,
#          data = ologit_ucla,
#          method = 'cloglog',
#          Hess=TRUE)
#
# ## with r = r_logit or r_probit or r_cloglog
# ## we add p-values
# (ctable <- coef(summary(r)))
# p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
# (ctable <- cbind(ctable, "p value" = p))
# ## show 7 first predictions
# head(predict(r, subset(ologit_ucla,
#                        select=c("pared", "public","gpa")), type='prob'),7)

data_store = Holder()
cur_dir = os.path.dirname(os.path.abspath(__file__))
df = pd.read_csv(os.path.join(cur_dir, "ologit_ucla.csv"))

# df_unordered['apply'] is pd.Categorical with ordered = False
df_unordered = df.copy()
df_unordered['apply'] = pd.Categorical(df['apply'], ordered=False)
# but categories are set in order
df_unordered['apply'].cat.set_categories(
    ['unlikely', 'somewhat likely', 'very likely'], inplace=True)

# df['apply'] is pd.Categorical with ordered = True
df['apply'] = pd.Categorical(df['apply'], ordered=True)
df['apply'].cat.set_categories(
    ['unlikely', 'somewhat likely', 'very likely'], inplace=True)
Ejemplo n.º 32
0
 def setup_class(cls):
     cls.res2 = tost_clinic_indep_2_pooled
     x, y = clinic[:15, 3], clinic[15:, 3]
     cls.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='pooled')
     cls.res1.pvalue = res[0]
Ejemplo n.º 33
0
from statsmodels.stats.correlation_tools import (
    corr_nearest, corr_clipped, cov_nearest, _project_correlation_factors,
    corr_nearest_factor, _spg_optim, corr_thresholded,
    cov_nearest_factor_homog, FactoredPSDMatrix)
from statsmodels.tools.testing import Holder


def norm_f(x, y):
    '''Frobenious norm (squared sum) of difference between two arrays
    '''
    d = ((x - y)**2).sum()
    return np.sqrt(d)


# R library Matrix results
cov1_r = Holder()
#> nc  <- nearPD(pr, conv.tol = 1e-7, keepDiag = TRUE, doDykstra =FALSE, corr=TRUE)
#> cat_items(nc, prefix="cov1_r.")
cov1_r.mat = '''<S4 object of class structure("dpoMatrix", package = "Matrix")>'''
cov1_r.eigenvalues = np.array([
    4.197315628646795, 0.7540460243978023, 0.5077608149667492,
    0.3801267599652769, 0.1607508970775889, 4.197315628646795e-08
])
cov1_r.corr = '''TRUE'''
cov1_r.normF = 0.0743805226512533
cov1_r.iterations = 11
cov1_r.rel_tol = 8.288594638441735e-08
cov1_r.converged = '''TRUE'''
#> mkarray2(as.matrix(nc$mat), name="cov1_r.mat")
cov1_r.mat = np.array([
    1, 0.487968018215892, 0.642651880010906, 0.4906386709070835,
Ejemplo n.º 34
0
def test_cohens_kappa_irr():

    ck_w3 = Holder()
    ck_w4 = Holder()

    #>r = kappa2(anxiety[,1:2], c(0,0,0,1,1,1))
    #> cat_items(r, pref="ck_w3.")
    ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,0,1,1,1)"
    ck_w3.irr_name = 'Kappa'
    ck_w3.value = 0.1891892
    ck_w3.stat_name = 'z'
    ck_w3.statistic = 0.5079002
    ck_w3.p_value = 0.6115233

    #> r = kappa2(anxiety[,1:2], c(0,0,1,1,2,2))
    #> cat_items(r, pref="ck_w4.")
    ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2,2)"
    ck_w4.irr_name = 'Kappa'
    ck_w4.value = 0.2820513
    ck_w4.stat_name = 'z'
    ck_w4.statistic = 1.257410
    ck_w4.p_value = 0.2086053

    ck_w1 = Holder()
    ck_w2 = Holder()
    ck_w3 = Holder()
    ck_w4 = Holder()
    #> r = kappa2(anxiety[,2:3])
    #> cat_items(r, pref="ck_w1.")
    ck_w1.method = "Cohen's Kappa for 2 Raters (Weights: unweighted)"
    ck_w1.irr_name = 'Kappa'
    ck_w1.value = -0.006289308
    ck_w1.stat_name = 'z'
    ck_w1.statistic = -0.0604067
    ck_w1.p_value = 0.9518317

    #> r = kappa2(anxiety[,2:3], "equal")
    #> cat_items(r, pref="ck_w2.")
    ck_w2.method = "Cohen's Kappa for 2 Raters (Weights: equal)"
    ck_w2.irr_name = 'Kappa'
    ck_w2.value = 0.1459075
    ck_w2.stat_name = 'z'
    ck_w2.statistic = 1.282472
    ck_w2.p_value = 0.1996772

    #> r = kappa2(anxiety[,2:3], "squared")
    #> cat_items(r, pref="ck_w3.")
    ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: squared)"
    ck_w3.irr_name = 'Kappa'
    ck_w3.value = 0.2520325
    ck_w3.stat_name = 'z'
    ck_w3.statistic = 1.437451
    ck_w3.p_value = 0.1505898

    #> r = kappa2(anxiety[,2:3], c(0,0,1,1,2))
    #> cat_items(r, pref="ck_w4.")
    ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2)"
    ck_w4.irr_name = 'Kappa'
    ck_w4.value = 0.2391304
    ck_w4.stat_name = 'z'
    ck_w4.statistic = 1.223734
    ck_w4.p_value = 0.2210526

    all_cases = [(ck_w1, None, None),
                 (ck_w2, None, 'linear'),
                 (ck_w2, np.arange(5), None),
                 (ck_w2, np.arange(5), 'toeplitz'),
                 (ck_w3, None, 'quadratic'),
                 (ck_w3, np.arange(5)**2, 'toeplitz'),
                 (ck_w3, 4*np.arange(5)**2, 'toeplitz'),
                 (ck_w4, [0,0,1,1,2], 'toeplitz')]

    #Note R:irr drops the missing category level 4 and uses the reduced matrix
    r = np.histogramdd(anxiety[:,1:], ([1, 2, 3, 4, 6, 7], [1, 2, 3, 4, 6, 7]))

    for res2, w, wt in all_cases:
        msg = repr(w) + repr(wt)
        res1 = cohens_kappa(r[0], weights=w, wt=wt)
        assert_almost_equal(res1.kappa, res2.value, decimal=6, err_msg=msg)
        assert_almost_equal(res1.z_value, res2.statistic, decimal=5, err_msg=msg)
        assert_almost_equal(res1.pvalue_two_sided, res2.p_value, decimal=6, err_msg=msg)
                                        np.asarray([20., 20]))
    # TODO: check this is this difference expected?, see test_proportion
    assert_allclose(res1[1], res2[1], rtol=0.03)

    res1a = CompareMeans(d1, d2).ztest_ind()
    assert_allclose(res1a[1], res2[1], rtol=0.03)
    assert_almost_equal(res1a, res1, decimal=12)


# test for ztest and z confidence interval against R BSDA z.test
# Note: I needed to calculate the pooled standard deviation for R
#       std = np.std(np.concatenate((x-x.mean(),y-y.mean())), ddof=2)

# > zt = z.test(x, sigma.x=0.57676142668828667, y, sigma.y=0.57676142668828667)
# > cat_items(zt, "ztest.")
ztest_ = Holder()
ztest_.statistic = 6.55109865675183
ztest_.p_value = 5.711530850508982e-11
ztest_.conf_int = np.array([1.230415246535603, 2.280948389828034])
ztest_.estimate = np.array([7.01818181818182, 5.2625])
ztest_.null_value = 0
ztest_.alternative = 'two.sided'
ztest_.method = 'Two-sample z-Test'
ztest_.data_name = 'x and y'
# > zt = z.test(x, sigma.x=0.57676142668828667, y,
#               sigma.y=0.57676142668828667, alternative="less")
# > cat_items(zt, "ztest_smaller.")
ztest_smaller = Holder()
ztest_smaller.statistic = 6.55109865675183
ztest_smaller.p_value = 0.999999999971442
ztest_smaller.conf_int = np.array([np.nan, 2.196499421109045])
Ejemplo n.º 36
0
from numpy import array

from statsmodels.tools.testing import Holder


data = Holder()
data.comment = 'generated data, divide by 1000'
data.name = 'data'
data.xo = array([
    [-419, -731, -1306, -1294],
    [6, 529, -200, -437],
    [-27, -833, -6, -564],
    [-304, -273, -502, -739],
    [1377, -912, 927, 280],
    [-375, -517, -514, 49],
    [247, -504, 123, -259],
    [712, 534, -773, 286],
    [195, -1080, 3256, -178],
    [-854, 75, -706, -1084],
    [-1219, -612, -15, -203],
    [550, -628, -483, -2686],
    [-365, 1376, -1266, 317],
    [-489, 544, -195, 431],
    [-656, 854, 840, -723],
    [16, -1385, -880, -460],
    [258, -2252, 96, 54],
    [2049, -750, -1115, 381],
    [-65, 280, -777, 416],
    [755, 82, -806, 1027],
    [-39, -170, -2134, 743],
    [-859, 780, 746, -133],
Ejemplo n.º 37
0
 def setup_class(cls):
     cls.res2 = tost_clinic_indep_1
     x, y = clinic[:15, 2], clinic[15:, 2]
     cls.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='unequal')
     cls.res1.pvalue = res[0]
Ejemplo n.º 38
0
from numpy import array

from statsmodels.tools.testing import Holder

mlpacf = Holder()
mlpacf.comment = 'mlab.parcorr(x, [], 2, nout=3)'
mlpacf.name = 'mlpacf'
mlpacf.lags1000 = array([[0.], [1.], [2.], [3.], [4.], [5.], [6.], [7.], [8.],
                         [9.], [10.], [11.], [12.], [13.], [14.], [15.], [16.],
                         [17.], [18.], [19.], [20.]])
mlpacf.bounds1000 = array([[0.06334064], [-0.06334064]])
mlpacf.lags100 = array([[0.], [1.], [2.], [3.], [4.], [5.], [6.], [7.], [8.],
                        [9.], [10.], [11.], [12.], [13.], [14.], [15.], [16.],
                        [17.], [18.], [19.], [20.]])
mlpacf.pacf100 = array([[1.], [0.47253777], [-0.49466966], [-0.02689319],
                        [-0.00122204],
                        [0.08419183], [0.03220774], [0.10404012], [0.05304617],
                        [-0.04129564], [-0.04049451], [0.11727754],
                        [0.11804158], [-0.05864957], [-0.15681802],
                        [0.11828684], [0.05156002], [0.00694629], [0.01668964],
                        [0.02236851], [-0.0909443]])
mlpacf.pacf1000 = array([[1.00000000e+00], [5.29288262e-01], [-5.31849027e-01],
                         [1.17440051e-02], [-5.37941905e-02
                                            ], [-4.11119348e-02],
                         [-2.40367432e-02], [2.24289891e-02], [3.33007235e-02],
                         [4.59658302e-02], [6.65850553e-03], [-3.76714278e-02],
                         [5.27229738e-02], [2.50796558e-02], [-4.42597301e-02],
                         [-1.95819186e-02],
                         [4.70451394e-02], [-1.70963705e-03], [3.04262524e-04],
                         [-6.22001614e-03], [-1.16694989e-02]])
mlpacf.bounds100 = array([[0.20306923], [-0.20306923]])
Ejemplo n.º 39
0
from numpy import array, rec

from statsmodels.tools.testing import Holder

var_results = Holder()
var_results.comment = 'VAR test data converted from vars_results.npz'
var_results.causality = array([
    (9.317172089406967e-08,),
    (0.5183914225971917,),
    (4.8960835385969403e-14,)],
    dtype=[('causedby', 'float')])
var_results.name = 'var_results'
var_results.orthirf = array({
    'realgdp': rec.array([
        (0.007557357219752236, 0.003948403413668315, 0.02972434157321242),
        (0.0015408726821578582, 0.0010664916255201816, 0.00923575489996933),
        (0.0015874964105555918, 0.0010551760558416706, 0.006102514196485799),
        (0.0007262051539604352, 0.0005562787500837443, 0.003199064883156089),
        (0.0005537000868358786, 0.0003520396722562061, 0.0024372344590635623),
        (0.0003079984190444812, 0.00021674897409108682, .0013369479853037147)],
        dtype=[('realgdp', 'float'),
               ('realcons', 'float'),
               ('realinv', 'float')]),
    'realinv': rec.array([
        (.0, 0.0, 0.020741992721114832),
        (.0006890376065674764, .0005338724781743238, 0.004676882806534488),
        (.00017134455810606506, .000682084896451223, -0.0005205835547221123),
        (.0005217378718553543, .00030179909990059973, 0.0026650577026759623),
        (.00034979575853114173, .00022249591743758265, 0.0015804716569096742),
        (.00017738402507880077, .00013384975583249413, 0.0007585745605878197)],
        dtype=[('realgdp', 'float'),
Ejemplo n.º 40
0
    def get_results(cls):
        cls.k = 1
        # results from R WRS2
        cls.res_basic = np.array([
            342.705882352941, 92.3342348150314, 380.157894736842,
            92.9416968861829, 129679.029239766
        ])

        # results from R PairedData
        ytt1 = Holder()
        ytt1.statistic = 3.71157981694944
        ytt1.parameter = 16
        ytt1.p_value = 0.00189544440273015
        ytt1.conf_int = np.array([146.966048669017, 538.445716036866])
        ytt1.estimate = 342.705882352941
        ytt1.null_value = 0
        ytt1.alternative = 'two.sided'
        ytt1.method = 'One sample Yuen test, trim=0.0526315789473684'
        ytt1.data_name = 'x'
        cls.ytt1 = ytt1