Example #1
0
from numpy import array

from statsmodels.tools.testing import Holder


data = Holder()
data.comment = 'generated data, divide by 1000'
data.name = 'data'
data.xo = array([
    [-419, -731, -1306, -1294],
    [6, 529, -200, -437],
    [-27, -833, -6, -564],
    [-304, -273, -502, -739],
    [1377, -912, 927, 280],
    [-375, -517, -514, 49],
    [247, -504, 123, -259],
    [712, 534, -773, 286],
    [195, -1080, 3256, -178],
    [-854, 75, -706, -1084],
    [-1219, -612, -15, -203],
    [550, -628, -483, -2686],
    [-365, 1376, -1266, 317],
    [-489, 544, -195, 431],
    [-656, 854, 840, -723],
    [16, -1385, -880, -460],
    [258, -2252, 96, 54],
    [2049, -750, -1115, 381],
    [-65, 280, -777, 416],
    [755, 82, -806, 1027],
    [-39, -170, -2134, 743],
    [-859, 780, 746, -133],
Example #2
0
from numpy import array

from statsmodels.tools.testing import Holder

armarep = Holder()
armarep.comment = ('mlab.garchma(-res_armarep.ar[1:], res_armarep.ma[1:], 20)'
                   'mlab.garchar(-res_armarep.ar[1:], res_armarep.ma[1:], 20)')
armarep.marep = array([[-0.1], [-0.77], [-0.305], [0.4635], [0.47575],
                       [-0.132925], [-0.4470625], [-0.11719125], [0.299054375],
                       [0.2432801875], [-0.11760340625], [-0.253425853125],
                       [-0.0326302015625], [0.18642558171875],
                       [0.11931695210938], [-0.08948198932031],
                       [-0.14019455634766], [0.00148831328242],
                       [0.11289980171934], [0.05525925023373]])
armarep.ar = array([1., -0.5, 0.8])
armarep.ma = array([1., -0.6, 0.08])
armarep.name = 'armarep'
armarep.arrep = array([[-1.00000000000000e-01], [-7.80000000000000e-01],
                       [-4.60000000000000e-01], [-2.13600000000000e-01],
                       [-9.13600000000000e-02], [-3.77280000000000e-02],
                       [-1.53280000000000e-02], [-6.17856000000000e-03],
                       [-2.48089600000000e-03], [-9.94252799999999e-04],
                       [-3.98080000000000e-04], [-1.59307776000000e-04],
                       [-6.37382655999999e-05], [-2.54983372800000e-05],
                       [-1.01999411200000e-05], [-4.08009768959999e-06],
                       [-1.63206332416000e-06], [-6.52830179327999e-07],
                       [-2.61133041663999e-07], [-1.04453410652160e-07]])
Created on Mon May  4 21:21:09 2020

Author: Josef Perktold
License: BSD-3

"""

import numpy as np
from statsmodels.tools.testing import Holder

NA = np.nan

# > pe = poisson.exact(c(60, 30), c(51477.5, 54308.7), tsmethod="minlike",
#                      midp=FALSE)
# > cat_items(pe, prefix="res.")
res_pexact_cond = res = Holder()
res.statistic = 60
res.parameter = 43.7956463130352
res.p_value = 0.000675182658686321
res.conf_int = np.array([
    1.34983090611567, 3.27764509862914
    ])
res.estimate = 2.10999757175465
res.null_value = 1
res.alternative = 'two.sided'
res.method = ('Exact two-sided Poisson test (sum of minimum likelihood'
              ' method)')
res.data_name = 'c(60, 30) time base: c(51477.5, 54308.7)'


# > pe = poisson.exact(c(60, 30), c(51477.5, 54308.7), tsmethod="minlike",
                                        np.asarray([20., 20]))
    # TODO: check this is this difference expected?, see test_proportion
    assert_allclose(res1[1], res2[1], rtol=0.03)

    res1a = CompareMeans(d1, d2).ztest_ind()
    assert_allclose(res1a[1], res2[1], rtol=0.03)
    assert_almost_equal(res1a, res1, decimal=12)


# test for ztest and z confidence interval against R BSDA z.test
# Note: I needed to calculate the pooled standard deviation for R
#       std = np.std(np.concatenate((x-x.mean(),y-y.mean())), ddof=2)

# > zt = z.test(x, sigma.x=0.57676142668828667, y, sigma.y=0.57676142668828667)
# > cat_items(zt, "ztest.")
ztest_ = Holder()
ztest_.statistic = 6.55109865675183
ztest_.p_value = 5.711530850508982e-11
ztest_.conf_int = np.array([1.230415246535603, 2.280948389828034])
ztest_.estimate = np.array([7.01818181818182, 5.2625])
ztest_.null_value = 0
ztest_.alternative = 'two.sided'
ztest_.method = 'Two-sample z-Test'
ztest_.data_name = 'x and y'
# > zt = z.test(x, sigma.x=0.57676142668828667, y,
#               sigma.y=0.57676142668828667, alternative="less")
# > cat_items(zt, "ztest_smaller.")
ztest_smaller = Holder()
ztest_smaller.statistic = 6.55109865675183
ztest_smaller.p_value = 0.999999999971442
ztest_smaller.conf_int = np.array([np.nan, 2.196499421109045])
Example #5
0
19    2 3.46 3.60 2.97 1.80 1.74
20    2 4.01 3.48 4.42 3.06 2.76
21    2 3.04 2.87 2.87 2.71 2.87
22    2 3.47 3.24 3.47 3.26 3.14
23    2 4.06 3.92 3.18 3.06 1.74
24    2 2.91 3.99 3.06 2.02 3.18
25    2 3.59 4.21 4.02 3.26 2.85
26    2 4.51 4.21 3.78 2.63 1.92
27    2 3.16 3.31 3.28 3.25 3.52
28    2 3.86 3.61 3.28 3.19 3.09
29    2 3.31 2.97 3.76 3.18 2.60
30    2 3.02 2.73 3.87 3.50 2.93'''.split()
clinic = np.array(raw_clinic, float).reshape(-1, 7)

#t = tost(-clinic$var2[16:30] + clinic$var2[1:15], eps=0.6)
tost_clinic_paired = Holder()
tost_clinic_paired.sample = 'paired'
tost_clinic_paired.mean_diff = 0.5626666666666665
tost_clinic_paired.se_diff = 0.2478276410785118
tost_clinic_paired.alpha = 0.05
tost_clinic_paired.ci_diff = (0.1261653305099018, 0.999168002823431)
tost_clinic_paired.df = 14
tost_clinic_paired.epsilon = 0.6
tost_clinic_paired.result = 'not rejected'
tost_clinic_paired.p_value = 0.4412034046017588
tost_clinic_paired.check_me = (0.525333333333333, 0.6)

#> t = tost(-clinic$var1[16:30] + clinic$var1[1:15], eps=0.6)
#> cat_items(t, prefix="tost_clinic_paired_1.")
tost_clinic_paired_1 = Holder()
tost_clinic_paired_1.mean_diff = 0.1646666666666667
Example #6
0
 def setup_class(cls):
     cls.res2 = tost_clinic_indep
     x, y = clinic[:15, 3], clinic[15:, 3]
     cls.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='unequal')
     cls.res1.pvalue = res[0]
Example #7
0
from numpy import array

from statsmodels.tools.testing import Holder

mlpacf = Holder()
mlpacf.comment = 'mlab.parcorr(x, [], 2, nout=3)'
mlpacf.name = 'mlpacf'
mlpacf.lags1000 = array([[0.], [1.], [2.], [3.], [4.], [5.], [6.], [7.], [8.],
                         [9.], [10.], [11.], [12.], [13.], [14.], [15.], [16.],
                         [17.], [18.], [19.], [20.]])
mlpacf.bounds1000 = array([[0.06334064], [-0.06334064]])
mlpacf.lags100 = array([[0.], [1.], [2.], [3.], [4.], [5.], [6.], [7.], [8.],
                        [9.], [10.], [11.], [12.], [13.], [14.], [15.], [16.],
                        [17.], [18.], [19.], [20.]])
mlpacf.pacf100 = array([[1.], [0.47253777], [-0.49466966], [-0.02689319],
                        [-0.00122204],
                        [0.08419183], [0.03220774], [0.10404012], [0.05304617],
                        [-0.04129564], [-0.04049451], [0.11727754],
                        [0.11804158], [-0.05864957], [-0.15681802],
                        [0.11828684], [0.05156002], [0.00694629], [0.01668964],
                        [0.02236851], [-0.0909443]])
mlpacf.pacf1000 = array([[1.00000000e+00], [5.29288262e-01], [-5.31849027e-01],
                         [1.17440051e-02], [-5.37941905e-02
                                            ], [-4.11119348e-02],
                         [-2.40367432e-02], [2.24289891e-02], [3.33007235e-02],
                         [4.59658302e-02], [6.65850553e-03], [-3.76714278e-02],
                         [5.27229738e-02], [2.50796558e-02], [-4.42597301e-02],
                         [-1.95819186e-02],
                         [4.70451394e-02], [-1.70963705e-03], [3.04262524e-04],
                         [-6.22001614e-03], [-1.16694989e-02]])
mlpacf.bounds100 = array([[0.20306923], [-0.20306923]])
# ## fit ordered cloglog model
# r_cloglog <- polr(apply ~ pared + public + gpa,
#          data = ologit_ucla,
#          method = 'cloglog',
#          Hess=TRUE)
#
# ## with r = r_logit or r_probit or r_cloglog
# ## we add p-values
# (ctable <- coef(summary(r)))
# p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
# (ctable <- cbind(ctable, "p value" = p))
# ## show 7 first predictions
# head(predict(r, subset(ologit_ucla,
#                        select=c("pared", "public","gpa")), type='prob'),7)

data_store = Holder()
cur_dir = os.path.dirname(os.path.abspath(__file__))
df = pd.read_csv(os.path.join(cur_dir, "ologit_ucla.csv"))

# df_unordered['apply'] is pd.Categorical with ordered = False
df_unordered = df.copy()
df_unordered['apply'] = pd.Categorical(df['apply'], ordered=False)
# but categories are set in order
df_unordered['apply'].cat.set_categories(
    ['unlikely', 'somewhat likely', 'very likely'], inplace=True)

# df['apply'] is pd.Categorical with ordered = True
df['apply'] = pd.Categorical(df['apply'], ordered=True)
df['apply'].cat.set_categories(
    ['unlikely', 'somewhat likely', 'very likely'], inplace=True)
Example #9
0
def test_holder():
    holder = Holder()
    holder.new_attr = 1
    assert hasattr(holder, 'new_attr')
    assert getattr(holder, 'new_attr') == 1
Example #10
0
"""

Created on Sun Jun 30 20:25:22 2013

Author: Josef Perktold
"""

import pytest
import numpy as np
from numpy.testing import assert_allclose

from statsmodels.tools.tools import add_constant
from statsmodels.tools.testing import Holder
from statsmodels.miscmodels.tmodel import TLinearModel

mm = Holder()
mm.date_label = [
    "Apr.1982", "Apr.1983", "Apr.1984", "Apr.1985", "Apr.1986", "Aug.1982",
    "Aug.1983", "Aug.1984", "Aug.1985", "Aug.1986", "Dec.1982", "Dec.1983",
    "Dec.1984", "Dec.1985", "Dec.1986", "Feb.1284", "Feb.1982", "Feb.1983",
    "Feb.1985", "Feb.1986", "Jan.1982", "Jan.1983", "Jan.1984", "Jan.1985",
    "Jan.1986", "Jul.1982", "July1983", "July1984", "July1985", "July1986",
    "June1982", "June1983", "June1984", "June1985", "June1986", "Mar.1982",
    "Mar.1983", "Mar.1984", "Mar.1985", "Mar.1986", "May1982", "May1983",
    "May1984", "May1985", "May1986", "Nov.1982", "Nov.1983", "Nov.1984",
    "Nov.1985", "Nov.1986", "Oct.1982", "Oct.1983", "Oct.1984", "Oct.1985",
    "Oct.1986", "Sept.1982", "Sept.1983", "Sept.1984", "Sept.1985", "Sept.1986"
]

mm.m_marietta = np.array([
    -0.1365, -0.0769, -0.0575, 0.0526, -0.0449, -0.0859, -0.0742, 0.6879,
Example #11
0
# -*- coding: utf-8 -*-
"""

Created on Sun Jun 30 23:14:36 2013

Author: Josef Perktold
"""

import numpy as np

from statsmodels.tools.testing import Holder

res_t_dfest = Holder()
# > tfit3 <- tlm(m.marietta~CRSP, data=mm, start=list(dof = 3), estDof=TRUE)
# > cat_items(tfit3, "res_t_dfest.")
res_t_dfest.random = np.array([
    0.6242843, 1.349205, 1.224172, 1.272655, 1.323455, 1.091313, 1.227218,
    0.0316284, 0.7202973, 1.038392, 1.091907, 0.7966355, 0.784222, 0.5042926,
    0.1964543, 1.172123, 1.017338, 0.8799186, 0.7849335, 0.790158, 0.8121724,
    1.286998, 0.7286052, 1.330104, 1.054037, 1.299656, 1.285306, 1.271166,
    1.106877, 1.303909, 0.4250416, 1.277096, 1.160106, 0.1871806, 1.074168,
    1.197795, 1.046638, 1.104423, 1.301670, 1.333217, 0.8156778, 1.309934,
    1.142454, 1.347481, 0.6605017, 1.035725, 1.172666, 1.281746, 0.8796436,
    0.9597098, 0.6221453, 1.149490, 1.291864, 1.207619, 1.239625, 1.351065,
    1.248711, 0.3532520, 0.6067273, 0.8180234
])
res_t_dfest.dof = 2.837183
res_t_dfest.dofse = 1.175296
res_t_dfest.iter = 7
res_t_dfest.logLik = 71.81292
res_t_dfest.endTime = 0.01
Example #12
0
import numpy as np
import os
import pandas as pd
from statsmodels.tools.testing import Holder

cur_dir = os.path.dirname(os.path.abspath(__file__))

results_meth = Holder()
results_meth.type = 'ML'
results_meth.method = 'BFGS'
results_meth.scoring = 3
results_meth.start = np.array([
    1.44771372395646, 0.0615237727637243, 0.604926837329731, 0.98389051740736,
    6.25859738441389, 0
])
results_meth.n = 36
results_meth.nobs = 36
results_meth.df_null = 34
results_meth.df_residual = 30
results_meth.loglik = 104.148028405343
results_meth.vcov = np.array([
    0.00115682165449043, -0.000665413980696048, -0.000924081767589657,
    -0.000924126199147583, 0.000941505276523348, -1.44829373972985e-05,
    -0.000665413980696048, 0.00190019966824938, 4.45163588328844e-06,
    6.23668249663711e-06, -0.00216418558500309, 4.18754929463506e-05,
    -0.000924081767589657, 4.45163588328844e-06, 0.0023369966334575,
    0.000924223263225116, 0.000168988804218447, 1.14762434349836e-07,
    -0.000924126199147583, 6.23668249663711e-06, 0.000924223263225116,
    0.00282071714820361, 0.000331499252772628, 1.93773358431975e-07,
    0.000941505276523348, -0.00216418558500309, 0.000168988804218447,
    0.000331499252772628, 3.20761137509433, -0.0581708456538647,
Example #13
0
"""

import numpy as np
from statsmodels.tools.testing import Holder
"""
example from Kacker 2004, computed with R metafor

> y = c(61.0, 61.4 , 62.21, 62.3 , 62.34, 62.6 , 62.7 , 62.84, 65.9)
> v = c(0.2025, 1.2100, 0.0900, 0.2025, 0.3844, 0.5625, 0.0676, 0.0225, 1.8225)
> res = rma(y, v, data=dat, method="PM", control=list(tol=1e-9))
> convert_items(res, prefix="exk1_metafor.")

"""

exk1_metafor = Holder()
exk1_metafor.b = 62.4076199113286
exk1_metafor.beta = 62.4076199113286
exk1_metafor.se = 0.338030602684471
exk1_metafor.zval = 184.621213037276
exk1_metafor.pval = 0
exk1_metafor.ci_lb = 61.7450921043947
exk1_metafor.ci_ub = 63.0701477182625
exk1_metafor.vb = 0.114264688351227
exk1_metafor.tau2 = 0.705395309224248
exk1_metafor.se_tau2 = 0.51419109758052
exk1_metafor.tau2_f = 0.705395309224248
exk1_metafor.k = 9
exk1_metafor.k_f = 9
exk1_metafor.k_eff = 9
exk1_metafor.k_all = 9
Example #14
0
def simulate_power_equivalence_oneway(means, nobs, equiv_margin, vars_=None,
                                      k_mc=1000, trim_frac=0,
                                      options_var=None, margin_type="f2"
                                      ):  # , anova_options=None):  #TODO
    """Simulate Power for oneway equivalence test (Wellek's Anova)

    This function is experimental and written to evaluate asymptotic power
    function. This function will change without backwards compatibility
    constraints. The only part that is stable is `pvalue` attribute in results.

    Effect size for equivalence margin

    """
    if options_var is None:
        options_var = ["unequal", "equal", "bf"]
    if vars_ is not None:
        stds = np.sqrt(vars_)
    else:
        stds = np.ones(len(means))

    nobs_mean = nobs.mean()
    n_groups = len(nobs)
    res_mc = []
    f_mc = []
    reject_mc = []
    other_mc = []
    for _ in range(k_mc):
        y0, y1, y2, y3 = [m + std * np.random.randn(n)
                          for (n, m, std) in zip(nobs, means, stds)]

        res_i = []
        f_i = []
        reject_i = []
        other_i = []
        for uv in options_var:
            # for welch in options_welch:
            # res1 = sma.anova_generic(means, vars_, nobs, use_var=uv,
            #                          welch_correction=welch)
            res0 = anova_oneway([y0, y1, y2, y3], use_var=uv,
                                trim_frac=trim_frac)
            f_stat = res0.statistic
            res1 = equivalence_oneway_generic(f_stat, n_groups, nobs.sum(),
                                              equiv_margin, res0.df,
                                              alpha=0.05,
                                              margin_type=margin_type)
            res_i.append(res1.pvalue)
            es_wellek = f_stat * (n_groups - 1) / nobs_mean
            f_i.append(es_wellek)
            reject_i.append(res1.reject)
            other_i.extend([res1.crit_f, res1.crit_es, res1.power_zero])
        res_mc.append(res_i)
        f_mc.append(f_i)
        reject_mc.append(reject_i)
        other_mc.append(other_i)

    f_mc = np.asarray(f_mc)
    other_mc = np.asarray(other_mc)
    res_mc = np.asarray(res_mc)
    reject_mc = np.asarray(reject_mc)
    res = Holder(f_stat=f_mc,
                 other=other_mc,
                 pvalue=res_mc,
                 reject=reject_mc
                 )
    return res
Example #15
0
'''Generated Random Processes for tests

autogenerated by savervs.py

'''

from numpy import array

from statsmodels.tools.testing import Holder

rvsdata = Holder()
rvsdata.comment = 'generated data, divide by 1000, see savervs'
rvsdata.xarma32 = array([
    -1271, -1222, -840, -169, -1016, -980, -1272, -926, 445, 833, -91, -1974,
    -2231, -549, 424, 238, -1665, -1815, 685, 3361, 1912, -1931, -3555, -1817,
    387, 730, -1154, -702, 973, 1340, -161, 276, 200, 1785, 834, -1469, -1593,
    -134, 555, -422, -2314, -1326, -2268, -3579, -3049, -930, 1155, 962, -644,
    -217, -561, 224, 810, 2445, 2710, 2152, 502, 21, 164, -499, -1093, -492,
    531, -605, -1535, -2081, -3816, -2257, 487, 2134, 1785, 1495, 1259, 1895,
    1339, 617, 1143, 385, -1220, -738, 1171, 1047, -234, -107, -1458, -1244,
    -2737, 33, 2373, 2749, 2725, 3331, 1054, 418, 1231, -1171, -1446, -1187,
    863, 1386, 757, 734, 283, -735, 550, 417, -236, 324, 318, -102, 2126, 3246,
    2358, 2156, 726, -983, -803, -242, -500, -13, 49, 308, -227, 243, -612,
    -2329, -2476, -3441, -5435, -4693, -2538, -2159, -2656, -906, -211, -288,
    1777, 1363, 564, -2035, -1134, -609, -1112, 560, 658, 1533, 796, 523, 456,
    76, -1164, -749, -1084, -3218, -2107, -310, -686, -1625, 2008, 4155, 1650,
    -1086, -673, 1634, 1999, 449, -1077, -648, -155, -327, 228, 1295, 2036,
    542, -197, -451, -1554, -2416, -2066, -2146, -1524, -1976, -2962, -2621,
    -2313, -2052, -3314, -2363, -1522, -3305, -3445, -3206, -1501, 2029, 1963,
    1168, 2050, 2927, 2019, 84, 213, 1783, 617, -767, -425, 739, 281, 506,
    -749, -938, -284, -147, 51, 1296, 3033, 2263, 1409, -1702, -819, -1295,
Example #16
0
    def setup(self):
        self.n_success = np.array([73, 90, 114, 75])
        self.nobs = np.array([86, 93, 136, 82])

        self.res_ppt_pvals_raw = np.array([
            0.00533824886503131, 0.8327574849753566, 0.1880573726722516,
            0.002026764254350234, 0.1309487516334318, 0.1076118730631731
        ])
        self.res_ppt_pvals_holm = np.array([
            0.02669124432515654, 0.8327574849753566, 0.4304474922526926,
            0.0121605855261014, 0.4304474922526926, 0.4304474922526926
        ])

        res_prop_test = Holder()
        res_prop_test.statistic = 11.11938768628861
        res_prop_test.parameter = 3
        res_prop_test.p_value = 0.011097511366581344
        res_prop_test.estimate = np.array([
            0.848837209302326, 0.967741935483871, 0.838235294117647,
            0.9146341463414634
        ]).reshape(4, 1, order='F')
        res_prop_test.null_value = '''NULL'''
        res_prop_test.conf_int = '''NULL'''
        res_prop_test.alternative = 'two.sided'
        res_prop_test.method = '4-sample test for equality of proportions ' + \
                               'without continuity correction'
        res_prop_test.data_name = 'smokers2 out of patients'
        self.res_prop_test = res_prop_test

        #> pt = prop.test(smokers2, patients, p=rep(c(0.9), 4), correct=FALSE)
        #> cat_items(pt, "res_prop_test_val.")
        res_prop_test_val = Holder()
        res_prop_test_val.statistic = np.array([13.20305530710751
                                                ]).reshape(1, 1, order='F')
        res_prop_test_val.parameter = np.array([4]).reshape(1, 1, order='F')
        res_prop_test_val.p_value = 0.010325090041836
        res_prop_test_val.estimate = np.array([
            0.848837209302326, 0.967741935483871, 0.838235294117647,
            0.9146341463414634
        ]).reshape(4, 1, order='F')
        res_prop_test_val.null_value = np.array([0.9, 0.9, 0.9,
                                                 0.9]).reshape(4, 1, order='F')
        res_prop_test_val.conf_int = '''NULL'''
        res_prop_test_val.alternative = 'two.sided'
        res_prop_test_val.method = '4-sample test for given proportions without continuity correction'
        res_prop_test_val.data_name = 'smokers2 out of patients, null probabilities rep(c(0.9), 4)'
        self.res_prop_test_val = res_prop_test_val

        #> pt = prop.test(smokers2[1], patients[1], p=0.9, correct=FALSE)
        #> cat_items(pt, "res_prop_test_1.")
        res_prop_test_1 = Holder()
        res_prop_test_1.statistic = 2.501291989664086
        res_prop_test_1.parameter = 1
        res_prop_test_1.p_value = 0.113752943640092
        res_prop_test_1.estimate = 0.848837209302326
        res_prop_test_1.null_value = 0.9
        res_prop_test_1.conf_int = np.array(
            [0.758364348004061, 0.9094787701686766])
        res_prop_test_1.alternative = 'two.sided'
        res_prop_test_1.method = '1-sample proportions test without continuity correction'
        res_prop_test_1.data_name = 'smokers2[1] out of patients[1], null probability 0.9'
        self.res_prop_test_1 = res_prop_test_1
Example #17
0
def test_mv_mean():
    # names = ['id', 'mpg1', 'mpg2', 'add']
    x = np.asarray([[1.0, 24.0, 23.5, 1.0], [2.0, 25.0, 24.5, 1.0],
                    [3.0, 21.0, 20.5, 1.0], [4.0, 22.0, 20.5, 1.0],
                    [5.0, 23.0, 22.5, 1.0], [6.0, 18.0, 16.5, 1.0],
                    [7.0, 17.0, 16.5, 1.0], [8.0, 28.0, 27.5, 1.0],
                    [9.0, 24.0, 23.5, 1.0], [10.0, 27.0, 25.5, 1.0],
                    [11.0, 21.0, 20.5, 1.0], [12.0, 23.0, 22.5, 1.0],
                    [1.0, 20.0, 19.0, 0.0], [2.0, 23.0, 22.0, 0.0],
                    [3.0, 21.0, 20.0, 0.0], [4.0, 25.0, 24.0, 0.0],
                    [5.0, 18.0, 17.0, 0.0], [6.0, 17.0, 16.0, 0.0],
                    [7.0, 18.0, 17.0, 0.0], [8.0, 24.0, 23.0, 0.0],
                    [9.0, 20.0, 19.0, 0.0], [10.0, 24.0, 22.0, 0.0],
                    [11.0, 23.0, 22.0, 0.0], [12.0, 19.0, 18.0, 0.0]])

    res = smmv.test_mvmean(x[:, 1:3], [21, 21])

    res_stata = Holder(p_F=1.25062334808e-09,
                       df_r=22,
                       df_m=2,
                       F=59.91609589041116,
                       T2=125.2791095890415)

    assert_allclose(res.statistic, res_stata.F, rtol=1e-10)
    assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-10)
    assert_allclose(res.t2, res_stata.T2, rtol=1e-10)
    assert_equal(res.df, [res_stata.df_m, res_stata.df_r])

    # diff of paired sample
    mask = x[:, -1] == 1
    x1 = x[mask, 1:3]
    x0 = x[~mask, 1:3]
    res_p = smmv.test_mvmean(x1 - x0, [0, 0])

    # result Stata hotelling
    res_stata = Holder(
        T2=9.698067632850247,
        df=10,
        k=2,
        N=12,
        F=4.4082126,  # not in return List
        p_F=0.0424)  # not in return List

    res = res_p
    assert_allclose(res.statistic, res_stata.F, atol=5e-7)
    assert_allclose(res.pvalue, res_stata.p_F, atol=5e-4)
    assert_allclose(res.t2, res_stata.T2, rtol=1e-10)
    assert_equal(res.df, [res_stata.k, res_stata.df])

    # mvtest means diff1 diff2, zero
    res_stata = Holder(p_F=.0423949782937231,
                       df_r=10,
                       df_m=2,
                       F=4.408212560386478,
                       T2=9.69806763285025)

    assert_allclose(res.statistic, res_stata.F, rtol=1e-12)
    assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-12)
    assert_allclose(res.t2, res_stata.T2, rtol=1e-12)
    assert_equal(res.df, [res_stata.df_m, res_stata.df_r])

    dw = weightstats.DescrStatsW(x)
    ci0 = dw.tconfint_mean(alpha=0.05)

    nobs = len(x[:, 1:])
    ci1 = confint_mvmean_fromstats(dw.mean,
                                   np.diag(dw.var),
                                   nobs,
                                   lin_transf=np.eye(4),
                                   alpha=0.05)
    ci2 = confint_mvmean_fromstats(dw.mean,
                                   dw.cov,
                                   nobs,
                                   lin_transf=np.eye(4),
                                   alpha=0.05)

    assert_allclose(ci1[:2], ci0, rtol=1e-13)
    assert_allclose(ci2[:2], ci0, rtol=1e-13)

    # test from data
    res = smmv.confint_mvmean(x, lin_transf=np.eye(4), alpha=0.05)
    assert_allclose(res, ci2, rtol=1e-13)
Example #18
0
from statsmodels.stats.correlation_tools import (
    corr_nearest, corr_clipped, cov_nearest, _project_correlation_factors,
    corr_nearest_factor, _spg_optim, corr_thresholded,
    cov_nearest_factor_homog, FactoredPSDMatrix)
from statsmodels.tools.testing import Holder


def norm_f(x, y):
    '''Frobenious norm (squared sum) of difference between two arrays
    '''
    d = ((x - y)**2).sum()
    return np.sqrt(d)


# R library Matrix results
cov1_r = Holder()
#> nc  <- nearPD(pr, conv.tol = 1e-7, keepDiag = TRUE, doDykstra =FALSE, corr=TRUE)
#> cat_items(nc, prefix="cov1_r.")
cov1_r.mat = '''<S4 object of class structure("dpoMatrix", package = "Matrix")>'''
cov1_r.eigenvalues = np.array([
    4.197315628646795, 0.7540460243978023, 0.5077608149667492,
    0.3801267599652769, 0.1607508970775889, 4.197315628646795e-08
])
cov1_r.corr = '''TRUE'''
cov1_r.normF = 0.0743805226512533
cov1_r.iterations = 11
cov1_r.rel_tol = 8.288594638441735e-08
cov1_r.converged = '''TRUE'''
#> mkarray2(as.matrix(nc$mat), name="cov1_r.mat")
cov1_r.mat = np.array([
    1, 0.487968018215892, 0.642651880010906, 0.4906386709070835,
Example #19
0
def test_cohens_kappa_irr():

    ck_w3 = Holder()
    ck_w4 = Holder()

    #>r = kappa2(anxiety[,1:2], c(0,0,0,1,1,1))
    #> cat_items(r, pref="ck_w3.")
    ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,0,1,1,1)"
    ck_w3.irr_name = 'Kappa'
    ck_w3.value = 0.1891892
    ck_w3.stat_name = 'z'
    ck_w3.statistic = 0.5079002
    ck_w3.p_value = 0.6115233

    #> r = kappa2(anxiety[,1:2], c(0,0,1,1,2,2))
    #> cat_items(r, pref="ck_w4.")
    ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2,2)"
    ck_w4.irr_name = 'Kappa'
    ck_w4.value = 0.2820513
    ck_w4.stat_name = 'z'
    ck_w4.statistic = 1.257410
    ck_w4.p_value = 0.2086053

    ck_w1 = Holder()
    ck_w2 = Holder()
    ck_w3 = Holder()
    ck_w4 = Holder()
    #> r = kappa2(anxiety[,2:3])
    #> cat_items(r, pref="ck_w1.")
    ck_w1.method = "Cohen's Kappa for 2 Raters (Weights: unweighted)"
    ck_w1.irr_name = 'Kappa'
    ck_w1.value = -0.006289308
    ck_w1.stat_name = 'z'
    ck_w1.statistic = -0.0604067
    ck_w1.p_value = 0.9518317

    #> r = kappa2(anxiety[,2:3], "equal")
    #> cat_items(r, pref="ck_w2.")
    ck_w2.method = "Cohen's Kappa for 2 Raters (Weights: equal)"
    ck_w2.irr_name = 'Kappa'
    ck_w2.value = 0.1459075
    ck_w2.stat_name = 'z'
    ck_w2.statistic = 1.282472
    ck_w2.p_value = 0.1996772

    #> r = kappa2(anxiety[,2:3], "squared")
    #> cat_items(r, pref="ck_w3.")
    ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: squared)"
    ck_w3.irr_name = 'Kappa'
    ck_w3.value = 0.2520325
    ck_w3.stat_name = 'z'
    ck_w3.statistic = 1.437451
    ck_w3.p_value = 0.1505898

    #> r = kappa2(anxiety[,2:3], c(0,0,1,1,2))
    #> cat_items(r, pref="ck_w4.")
    ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2)"
    ck_w4.irr_name = 'Kappa'
    ck_w4.value = 0.2391304
    ck_w4.stat_name = 'z'
    ck_w4.statistic = 1.223734
    ck_w4.p_value = 0.2210526

    all_cases = [(ck_w1, None, None),
                 (ck_w2, None, 'linear'),
                 (ck_w2, np.arange(5), None),
                 (ck_w2, np.arange(5), 'toeplitz'),
                 (ck_w3, None, 'quadratic'),
                 (ck_w3, np.arange(5)**2, 'toeplitz'),
                 (ck_w3, 4*np.arange(5)**2, 'toeplitz'),
                 (ck_w4, [0,0,1,1,2], 'toeplitz')]

    #Note R:irr drops the missing category level 4 and uses the reduced matrix
    r = np.histogramdd(anxiety[:,1:], ([1, 2, 3, 4, 6, 7], [1, 2, 3, 4, 6, 7]))

    for res2, w, wt in all_cases:
        msg = repr(w) + repr(wt)
        res1 = cohens_kappa(r[0], weights=w, wt=wt)
        assert_almost_equal(res1.kappa, res2.value, decimal=6, err_msg=msg)
        assert_almost_equal(res1.z_value, res2.statistic, decimal=5, err_msg=msg)
        assert_almost_equal(res1.pvalue_two_sided, res2.p_value, decimal=6, err_msg=msg)
Example #20
0
 def setup_class(cls):
     cls.res2 = tost_clinic_paired
     x, y = clinic[:15, 3], clinic[15:, 3]
     cls.res1 = Holder()
     res = smws.ttost_paired(x, y, -0.6, 0.6, transform=None)
     cls.res1.pvalue = res[0]
def test_rank_compare_2indep1():
    # Example from Munzel and Hauschke 2003
    # data is given by counts, expand to observations
    levels = [-2, -1, 0, 1, 2]
    new = [24, 37, 21, 19, 6]
    active = [11, 51, 22, 21, 7]
    x1 = np.repeat(levels, new)
    x2 = np.repeat(levels, active)

    # using lawstat
    # > brunner.munzel.test(xn, xa) #brunnermunzel.test(x, y)
    res2_t = Holder(statistic=1.1757561456582,
                    df=204.2984239868,
                    pvalue=0.2410606649547,
                    ci=[0.4700629827705593, 0.6183882855872511],
                    prob=0.5442256341789052)

    res = rank_compare_2indep(x1, x2, use_t=False)
    assert_allclose(res.statistic, -res2_t.statistic, rtol=1e-13)
    assert_allclose(res.prob1, 1 - res2_t.prob, rtol=1e-13)
    assert_allclose(res.prob2, res2_t.prob, rtol=1e-13)
    tt = res.test_prob_superior()
    # TODO: return HolderTuple
    # assert_allclose(tt.statistic, res2_t.statistic)
    # TODO: check sign/direction in lawstat
    assert_allclose(tt[0], -res2_t.statistic, rtol=1e-13)

    ci = res.conf_int(alpha=0.05)
    # we compare normal confint with t confint, lower rtol
    assert_allclose(ci, 1 - np.array(res2_t.ci)[::-1], rtol=0.005)
    # test consistency of test and confint
    res_lb = res.test_prob_superior(value=ci[0])
    assert_allclose(res_lb[1], 0.05, rtol=1e-13)
    res_ub = res.test_prob_superior(value=ci[1])
    assert_allclose(res_ub[1], 0.05, rtol=1e-13)

    # test consistency of tost and confint
    # lower margin is binding, alternative larger
    res_tost = res.tost_prob_superior(ci[0], ci[1] * 1.05)
    assert_allclose(res_tost.results_larger.pvalue, 0.025, rtol=1e-13)
    assert_allclose(res_tost.pvalue, 0.025, rtol=1e-13)

    # upper margin is binding, alternative smaller
    res_tost = res.tost_prob_superior(ci[0] * 0.85, ci[1])
    assert_allclose(res_tost.results_smaller.pvalue, 0.025, rtol=1e-13)
    assert_allclose(res_tost.pvalue, 0.025, rtol=1e-13)

    # use t-distribution
    # our ranking is defined as reversed from lawstat, and BM article
    # revere direction to match our definition
    x1, x2 = x2, x1
    res = rank_compare_2indep(x1, x2, use_t=True)
    assert_allclose(res.statistic, res2_t.statistic, rtol=1e-13)
    tt = res.test_prob_superior()
    # TODO: return HolderTuple
    # assert_allclose(tt.statistic, res2_t.statistic)
    # TODO: check sign/direction in lawstat, reversed from ours
    assert_allclose(tt[0], res2_t.statistic, rtol=1e-13)
    assert_allclose(tt[1], res2_t.pvalue, rtol=1e-13)
    assert_allclose(res.pvalue, res2_t.pvalue, rtol=1e-13)
    assert_allclose(res.df, res2_t.df, rtol=1e-13)

    ci = res.conf_int(alpha=0.05)
    assert_allclose(ci, res2_t.ci, rtol=1e-11)
    # test consistency of test and confint
    res_lb = res.test_prob_superior(value=ci[0])
    assert_allclose(res_lb[1], 0.05, rtol=1e-11)
    res_ub = res.test_prob_superior(value=ci[1])
    assert_allclose(res_ub[1], 0.05, rtol=1e-11)

    # test consistency of tost and confint
    # lower margin is binding, alternative larger
    res_tost = res.tost_prob_superior(ci[0], ci[1] * 1.05)
    assert_allclose(res_tost.results_larger.pvalue, 0.025, rtol=1e-10)
    assert_allclose(res_tost.pvalue, 0.025, rtol=1e-10)

    # upper margin is binding, alternative smaller
    res_tost = res.tost_prob_superior(ci[0] * 0.85, ci[1])
    assert_allclose(res_tost.results_smaller.pvalue, 0.025, rtol=1e-10)
    assert_allclose(res_tost.pvalue, 0.025, rtol=1e-10)

    # extras
    # cohen's d
    esd = res.effectsize_normal()
    p = prob_larger_continuous(stats.norm(loc=esd), stats.norm)
    # round trip
    assert_allclose(p, res.prob1, rtol=1e-13)

    # round trip with cohen's d
    pc = cohensd2problarger(esd)
    assert_allclose(pc, res.prob1, rtol=1e-13)

    ci_tr = res.confint_lintransf(1, -1)
    assert_allclose(ci_tr, 1 - np.array(res2_t.ci)[::-1], rtol=0.005)
Example #22
0
 def setup_class(cls):
     cls.res2 = tost_clinic_indep_1_pooled
     x, y = clinic[:15, 2], clinic[15:, 2]
     cls.res1 = Holder()
     res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='pooled')
     cls.res1.pvalue = res[0]
Example #23
0
# -*- coding: utf-8 -*-
"""
Created on Thu Jan  6 13:55:50 2022

Author: Josef Perktod
License: BSD-3
"""
# flake8: noqa

import numpy as np

from statsmodels.tools.testing import Holder

hurdle_poisson = Holder()
# r library pscl, docvis data
# > mod = hurdle( docvis ~ aget + totchr, data=dt, zero.dist = "poisson")
hurdle_poisson.method = 'BFGS'
hurdle_poisson.n = 3629
hurdle_poisson.df_null = 3627
hurdle_poisson.df_residual = 3623
hurdle_poisson.loglik = -13612.9091771797
hurdle_poisson.aic = 27237.81835436
hurdle_poisson.bic = 27274.9986288
hurdle_poisson.vcov = np.array([
    0.000239404800324688, -4.59559682721834e-05, -4.59865258972631e-05, 0, 0,
    0, -4.59559682721834e-05, 2.54346275490526e-05, -1.20164687288645e-06, 0,
    0, 0, -4.59865258972631e-05, -1.20164687288644e-06, 2.01936456643824e-05,
    0, 0, 0, 0, 0, 0, 0.00241829560973498, -0.000548499729173446,
    -0.000636055275016966, 0, 0, 0, -0.000548499729173446,
    0.000351548196602719, -6.30088654100178e-05, 0, 0, 0,
    -0.000636055275016966, -6.30088654100178e-05, 0.000562508220544602