from numpy import array from statsmodels.tools.testing import Holder data = Holder() data.comment = 'generated data, divide by 1000' data.name = 'data' data.xo = array([ [-419, -731, -1306, -1294], [6, 529, -200, -437], [-27, -833, -6, -564], [-304, -273, -502, -739], [1377, -912, 927, 280], [-375, -517, -514, 49], [247, -504, 123, -259], [712, 534, -773, 286], [195, -1080, 3256, -178], [-854, 75, -706, -1084], [-1219, -612, -15, -203], [550, -628, -483, -2686], [-365, 1376, -1266, 317], [-489, 544, -195, 431], [-656, 854, 840, -723], [16, -1385, -880, -460], [258, -2252, 96, 54], [2049, -750, -1115, 381], [-65, 280, -777, 416], [755, 82, -806, 1027], [-39, -170, -2134, 743], [-859, 780, 746, -133],
from numpy import array from statsmodels.tools.testing import Holder armarep = Holder() armarep.comment = ('mlab.garchma(-res_armarep.ar[1:], res_armarep.ma[1:], 20)' 'mlab.garchar(-res_armarep.ar[1:], res_armarep.ma[1:], 20)') armarep.marep = array([[-0.1], [-0.77], [-0.305], [0.4635], [0.47575], [-0.132925], [-0.4470625], [-0.11719125], [0.299054375], [0.2432801875], [-0.11760340625], [-0.253425853125], [-0.0326302015625], [0.18642558171875], [0.11931695210938], [-0.08948198932031], [-0.14019455634766], [0.00148831328242], [0.11289980171934], [0.05525925023373]]) armarep.ar = array([1., -0.5, 0.8]) armarep.ma = array([1., -0.6, 0.08]) armarep.name = 'armarep' armarep.arrep = array([[-1.00000000000000e-01], [-7.80000000000000e-01], [-4.60000000000000e-01], [-2.13600000000000e-01], [-9.13600000000000e-02], [-3.77280000000000e-02], [-1.53280000000000e-02], [-6.17856000000000e-03], [-2.48089600000000e-03], [-9.94252799999999e-04], [-3.98080000000000e-04], [-1.59307776000000e-04], [-6.37382655999999e-05], [-2.54983372800000e-05], [-1.01999411200000e-05], [-4.08009768959999e-06], [-1.63206332416000e-06], [-6.52830179327999e-07], [-2.61133041663999e-07], [-1.04453410652160e-07]])
Created on Mon May 4 21:21:09 2020 Author: Josef Perktold License: BSD-3 """ import numpy as np from statsmodels.tools.testing import Holder NA = np.nan # > pe = poisson.exact(c(60, 30), c(51477.5, 54308.7), tsmethod="minlike", # midp=FALSE) # > cat_items(pe, prefix="res.") res_pexact_cond = res = Holder() res.statistic = 60 res.parameter = 43.7956463130352 res.p_value = 0.000675182658686321 res.conf_int = np.array([ 1.34983090611567, 3.27764509862914 ]) res.estimate = 2.10999757175465 res.null_value = 1 res.alternative = 'two.sided' res.method = ('Exact two-sided Poisson test (sum of minimum likelihood' ' method)') res.data_name = 'c(60, 30) time base: c(51477.5, 54308.7)' # > pe = poisson.exact(c(60, 30), c(51477.5, 54308.7), tsmethod="minlike",
np.asarray([20., 20])) # TODO: check this is this difference expected?, see test_proportion assert_allclose(res1[1], res2[1], rtol=0.03) res1a = CompareMeans(d1, d2).ztest_ind() assert_allclose(res1a[1], res2[1], rtol=0.03) assert_almost_equal(res1a, res1, decimal=12) # test for ztest and z confidence interval against R BSDA z.test # Note: I needed to calculate the pooled standard deviation for R # std = np.std(np.concatenate((x-x.mean(),y-y.mean())), ddof=2) # > zt = z.test(x, sigma.x=0.57676142668828667, y, sigma.y=0.57676142668828667) # > cat_items(zt, "ztest.") ztest_ = Holder() ztest_.statistic = 6.55109865675183 ztest_.p_value = 5.711530850508982e-11 ztest_.conf_int = np.array([1.230415246535603, 2.280948389828034]) ztest_.estimate = np.array([7.01818181818182, 5.2625]) ztest_.null_value = 0 ztest_.alternative = 'two.sided' ztest_.method = 'Two-sample z-Test' ztest_.data_name = 'x and y' # > zt = z.test(x, sigma.x=0.57676142668828667, y, # sigma.y=0.57676142668828667, alternative="less") # > cat_items(zt, "ztest_smaller.") ztest_smaller = Holder() ztest_smaller.statistic = 6.55109865675183 ztest_smaller.p_value = 0.999999999971442 ztest_smaller.conf_int = np.array([np.nan, 2.196499421109045])
19 2 3.46 3.60 2.97 1.80 1.74 20 2 4.01 3.48 4.42 3.06 2.76 21 2 3.04 2.87 2.87 2.71 2.87 22 2 3.47 3.24 3.47 3.26 3.14 23 2 4.06 3.92 3.18 3.06 1.74 24 2 2.91 3.99 3.06 2.02 3.18 25 2 3.59 4.21 4.02 3.26 2.85 26 2 4.51 4.21 3.78 2.63 1.92 27 2 3.16 3.31 3.28 3.25 3.52 28 2 3.86 3.61 3.28 3.19 3.09 29 2 3.31 2.97 3.76 3.18 2.60 30 2 3.02 2.73 3.87 3.50 2.93'''.split() clinic = np.array(raw_clinic, float).reshape(-1, 7) #t = tost(-clinic$var2[16:30] + clinic$var2[1:15], eps=0.6) tost_clinic_paired = Holder() tost_clinic_paired.sample = 'paired' tost_clinic_paired.mean_diff = 0.5626666666666665 tost_clinic_paired.se_diff = 0.2478276410785118 tost_clinic_paired.alpha = 0.05 tost_clinic_paired.ci_diff = (0.1261653305099018, 0.999168002823431) tost_clinic_paired.df = 14 tost_clinic_paired.epsilon = 0.6 tost_clinic_paired.result = 'not rejected' tost_clinic_paired.p_value = 0.4412034046017588 tost_clinic_paired.check_me = (0.525333333333333, 0.6) #> t = tost(-clinic$var1[16:30] + clinic$var1[1:15], eps=0.6) #> cat_items(t, prefix="tost_clinic_paired_1.") tost_clinic_paired_1 = Holder() tost_clinic_paired_1.mean_diff = 0.1646666666666667
def setup_class(cls): cls.res2 = tost_clinic_indep x, y = clinic[:15, 3], clinic[15:, 3] cls.res1 = Holder() res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='unequal') cls.res1.pvalue = res[0]
from numpy import array from statsmodels.tools.testing import Holder mlpacf = Holder() mlpacf.comment = 'mlab.parcorr(x, [], 2, nout=3)' mlpacf.name = 'mlpacf' mlpacf.lags1000 = array([[0.], [1.], [2.], [3.], [4.], [5.], [6.], [7.], [8.], [9.], [10.], [11.], [12.], [13.], [14.], [15.], [16.], [17.], [18.], [19.], [20.]]) mlpacf.bounds1000 = array([[0.06334064], [-0.06334064]]) mlpacf.lags100 = array([[0.], [1.], [2.], [3.], [4.], [5.], [6.], [7.], [8.], [9.], [10.], [11.], [12.], [13.], [14.], [15.], [16.], [17.], [18.], [19.], [20.]]) mlpacf.pacf100 = array([[1.], [0.47253777], [-0.49466966], [-0.02689319], [-0.00122204], [0.08419183], [0.03220774], [0.10404012], [0.05304617], [-0.04129564], [-0.04049451], [0.11727754], [0.11804158], [-0.05864957], [-0.15681802], [0.11828684], [0.05156002], [0.00694629], [0.01668964], [0.02236851], [-0.0909443]]) mlpacf.pacf1000 = array([[1.00000000e+00], [5.29288262e-01], [-5.31849027e-01], [1.17440051e-02], [-5.37941905e-02 ], [-4.11119348e-02], [-2.40367432e-02], [2.24289891e-02], [3.33007235e-02], [4.59658302e-02], [6.65850553e-03], [-3.76714278e-02], [5.27229738e-02], [2.50796558e-02], [-4.42597301e-02], [-1.95819186e-02], [4.70451394e-02], [-1.70963705e-03], [3.04262524e-04], [-6.22001614e-03], [-1.16694989e-02]]) mlpacf.bounds100 = array([[0.20306923], [-0.20306923]])
# ## fit ordered cloglog model # r_cloglog <- polr(apply ~ pared + public + gpa, # data = ologit_ucla, # method = 'cloglog', # Hess=TRUE) # # ## with r = r_logit or r_probit or r_cloglog # ## we add p-values # (ctable <- coef(summary(r))) # p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2 # (ctable <- cbind(ctable, "p value" = p)) # ## show 7 first predictions # head(predict(r, subset(ologit_ucla, # select=c("pared", "public","gpa")), type='prob'),7) data_store = Holder() cur_dir = os.path.dirname(os.path.abspath(__file__)) df = pd.read_csv(os.path.join(cur_dir, "ologit_ucla.csv")) # df_unordered['apply'] is pd.Categorical with ordered = False df_unordered = df.copy() df_unordered['apply'] = pd.Categorical(df['apply'], ordered=False) # but categories are set in order df_unordered['apply'].cat.set_categories( ['unlikely', 'somewhat likely', 'very likely'], inplace=True) # df['apply'] is pd.Categorical with ordered = True df['apply'] = pd.Categorical(df['apply'], ordered=True) df['apply'].cat.set_categories( ['unlikely', 'somewhat likely', 'very likely'], inplace=True)
def test_holder(): holder = Holder() holder.new_attr = 1 assert hasattr(holder, 'new_attr') assert getattr(holder, 'new_attr') == 1
""" Created on Sun Jun 30 20:25:22 2013 Author: Josef Perktold """ import pytest import numpy as np from numpy.testing import assert_allclose from statsmodels.tools.tools import add_constant from statsmodels.tools.testing import Holder from statsmodels.miscmodels.tmodel import TLinearModel mm = Holder() mm.date_label = [ "Apr.1982", "Apr.1983", "Apr.1984", "Apr.1985", "Apr.1986", "Aug.1982", "Aug.1983", "Aug.1984", "Aug.1985", "Aug.1986", "Dec.1982", "Dec.1983", "Dec.1984", "Dec.1985", "Dec.1986", "Feb.1284", "Feb.1982", "Feb.1983", "Feb.1985", "Feb.1986", "Jan.1982", "Jan.1983", "Jan.1984", "Jan.1985", "Jan.1986", "Jul.1982", "July1983", "July1984", "July1985", "July1986", "June1982", "June1983", "June1984", "June1985", "June1986", "Mar.1982", "Mar.1983", "Mar.1984", "Mar.1985", "Mar.1986", "May1982", "May1983", "May1984", "May1985", "May1986", "Nov.1982", "Nov.1983", "Nov.1984", "Nov.1985", "Nov.1986", "Oct.1982", "Oct.1983", "Oct.1984", "Oct.1985", "Oct.1986", "Sept.1982", "Sept.1983", "Sept.1984", "Sept.1985", "Sept.1986" ] mm.m_marietta = np.array([ -0.1365, -0.0769, -0.0575, 0.0526, -0.0449, -0.0859, -0.0742, 0.6879,
# -*- coding: utf-8 -*- """ Created on Sun Jun 30 23:14:36 2013 Author: Josef Perktold """ import numpy as np from statsmodels.tools.testing import Holder res_t_dfest = Holder() # > tfit3 <- tlm(m.marietta~CRSP, data=mm, start=list(dof = 3), estDof=TRUE) # > cat_items(tfit3, "res_t_dfest.") res_t_dfest.random = np.array([ 0.6242843, 1.349205, 1.224172, 1.272655, 1.323455, 1.091313, 1.227218, 0.0316284, 0.7202973, 1.038392, 1.091907, 0.7966355, 0.784222, 0.5042926, 0.1964543, 1.172123, 1.017338, 0.8799186, 0.7849335, 0.790158, 0.8121724, 1.286998, 0.7286052, 1.330104, 1.054037, 1.299656, 1.285306, 1.271166, 1.106877, 1.303909, 0.4250416, 1.277096, 1.160106, 0.1871806, 1.074168, 1.197795, 1.046638, 1.104423, 1.301670, 1.333217, 0.8156778, 1.309934, 1.142454, 1.347481, 0.6605017, 1.035725, 1.172666, 1.281746, 0.8796436, 0.9597098, 0.6221453, 1.149490, 1.291864, 1.207619, 1.239625, 1.351065, 1.248711, 0.3532520, 0.6067273, 0.8180234 ]) res_t_dfest.dof = 2.837183 res_t_dfest.dofse = 1.175296 res_t_dfest.iter = 7 res_t_dfest.logLik = 71.81292 res_t_dfest.endTime = 0.01
import numpy as np import os import pandas as pd from statsmodels.tools.testing import Holder cur_dir = os.path.dirname(os.path.abspath(__file__)) results_meth = Holder() results_meth.type = 'ML' results_meth.method = 'BFGS' results_meth.scoring = 3 results_meth.start = np.array([ 1.44771372395646, 0.0615237727637243, 0.604926837329731, 0.98389051740736, 6.25859738441389, 0 ]) results_meth.n = 36 results_meth.nobs = 36 results_meth.df_null = 34 results_meth.df_residual = 30 results_meth.loglik = 104.148028405343 results_meth.vcov = np.array([ 0.00115682165449043, -0.000665413980696048, -0.000924081767589657, -0.000924126199147583, 0.000941505276523348, -1.44829373972985e-05, -0.000665413980696048, 0.00190019966824938, 4.45163588328844e-06, 6.23668249663711e-06, -0.00216418558500309, 4.18754929463506e-05, -0.000924081767589657, 4.45163588328844e-06, 0.0023369966334575, 0.000924223263225116, 0.000168988804218447, 1.14762434349836e-07, -0.000924126199147583, 6.23668249663711e-06, 0.000924223263225116, 0.00282071714820361, 0.000331499252772628, 1.93773358431975e-07, 0.000941505276523348, -0.00216418558500309, 0.000168988804218447, 0.000331499252772628, 3.20761137509433, -0.0581708456538647,
""" import numpy as np from statsmodels.tools.testing import Holder """ example from Kacker 2004, computed with R metafor > y = c(61.0, 61.4 , 62.21, 62.3 , 62.34, 62.6 , 62.7 , 62.84, 65.9) > v = c(0.2025, 1.2100, 0.0900, 0.2025, 0.3844, 0.5625, 0.0676, 0.0225, 1.8225) > res = rma(y, v, data=dat, method="PM", control=list(tol=1e-9)) > convert_items(res, prefix="exk1_metafor.") """ exk1_metafor = Holder() exk1_metafor.b = 62.4076199113286 exk1_metafor.beta = 62.4076199113286 exk1_metafor.se = 0.338030602684471 exk1_metafor.zval = 184.621213037276 exk1_metafor.pval = 0 exk1_metafor.ci_lb = 61.7450921043947 exk1_metafor.ci_ub = 63.0701477182625 exk1_metafor.vb = 0.114264688351227 exk1_metafor.tau2 = 0.705395309224248 exk1_metafor.se_tau2 = 0.51419109758052 exk1_metafor.tau2_f = 0.705395309224248 exk1_metafor.k = 9 exk1_metafor.k_f = 9 exk1_metafor.k_eff = 9 exk1_metafor.k_all = 9
def simulate_power_equivalence_oneway(means, nobs, equiv_margin, vars_=None, k_mc=1000, trim_frac=0, options_var=None, margin_type="f2" ): # , anova_options=None): #TODO """Simulate Power for oneway equivalence test (Wellek's Anova) This function is experimental and written to evaluate asymptotic power function. This function will change without backwards compatibility constraints. The only part that is stable is `pvalue` attribute in results. Effect size for equivalence margin """ if options_var is None: options_var = ["unequal", "equal", "bf"] if vars_ is not None: stds = np.sqrt(vars_) else: stds = np.ones(len(means)) nobs_mean = nobs.mean() n_groups = len(nobs) res_mc = [] f_mc = [] reject_mc = [] other_mc = [] for _ in range(k_mc): y0, y1, y2, y3 = [m + std * np.random.randn(n) for (n, m, std) in zip(nobs, means, stds)] res_i = [] f_i = [] reject_i = [] other_i = [] for uv in options_var: # for welch in options_welch: # res1 = sma.anova_generic(means, vars_, nobs, use_var=uv, # welch_correction=welch) res0 = anova_oneway([y0, y1, y2, y3], use_var=uv, trim_frac=trim_frac) f_stat = res0.statistic res1 = equivalence_oneway_generic(f_stat, n_groups, nobs.sum(), equiv_margin, res0.df, alpha=0.05, margin_type=margin_type) res_i.append(res1.pvalue) es_wellek = f_stat * (n_groups - 1) / nobs_mean f_i.append(es_wellek) reject_i.append(res1.reject) other_i.extend([res1.crit_f, res1.crit_es, res1.power_zero]) res_mc.append(res_i) f_mc.append(f_i) reject_mc.append(reject_i) other_mc.append(other_i) f_mc = np.asarray(f_mc) other_mc = np.asarray(other_mc) res_mc = np.asarray(res_mc) reject_mc = np.asarray(reject_mc) res = Holder(f_stat=f_mc, other=other_mc, pvalue=res_mc, reject=reject_mc ) return res
'''Generated Random Processes for tests autogenerated by savervs.py ''' from numpy import array from statsmodels.tools.testing import Holder rvsdata = Holder() rvsdata.comment = 'generated data, divide by 1000, see savervs' rvsdata.xarma32 = array([ -1271, -1222, -840, -169, -1016, -980, -1272, -926, 445, 833, -91, -1974, -2231, -549, 424, 238, -1665, -1815, 685, 3361, 1912, -1931, -3555, -1817, 387, 730, -1154, -702, 973, 1340, -161, 276, 200, 1785, 834, -1469, -1593, -134, 555, -422, -2314, -1326, -2268, -3579, -3049, -930, 1155, 962, -644, -217, -561, 224, 810, 2445, 2710, 2152, 502, 21, 164, -499, -1093, -492, 531, -605, -1535, -2081, -3816, -2257, 487, 2134, 1785, 1495, 1259, 1895, 1339, 617, 1143, 385, -1220, -738, 1171, 1047, -234, -107, -1458, -1244, -2737, 33, 2373, 2749, 2725, 3331, 1054, 418, 1231, -1171, -1446, -1187, 863, 1386, 757, 734, 283, -735, 550, 417, -236, 324, 318, -102, 2126, 3246, 2358, 2156, 726, -983, -803, -242, -500, -13, 49, 308, -227, 243, -612, -2329, -2476, -3441, -5435, -4693, -2538, -2159, -2656, -906, -211, -288, 1777, 1363, 564, -2035, -1134, -609, -1112, 560, 658, 1533, 796, 523, 456, 76, -1164, -749, -1084, -3218, -2107, -310, -686, -1625, 2008, 4155, 1650, -1086, -673, 1634, 1999, 449, -1077, -648, -155, -327, 228, 1295, 2036, 542, -197, -451, -1554, -2416, -2066, -2146, -1524, -1976, -2962, -2621, -2313, -2052, -3314, -2363, -1522, -3305, -3445, -3206, -1501, 2029, 1963, 1168, 2050, 2927, 2019, 84, 213, 1783, 617, -767, -425, 739, 281, 506, -749, -938, -284, -147, 51, 1296, 3033, 2263, 1409, -1702, -819, -1295,
def setup(self): self.n_success = np.array([73, 90, 114, 75]) self.nobs = np.array([86, 93, 136, 82]) self.res_ppt_pvals_raw = np.array([ 0.00533824886503131, 0.8327574849753566, 0.1880573726722516, 0.002026764254350234, 0.1309487516334318, 0.1076118730631731 ]) self.res_ppt_pvals_holm = np.array([ 0.02669124432515654, 0.8327574849753566, 0.4304474922526926, 0.0121605855261014, 0.4304474922526926, 0.4304474922526926 ]) res_prop_test = Holder() res_prop_test.statistic = 11.11938768628861 res_prop_test.parameter = 3 res_prop_test.p_value = 0.011097511366581344 res_prop_test.estimate = np.array([ 0.848837209302326, 0.967741935483871, 0.838235294117647, 0.9146341463414634 ]).reshape(4, 1, order='F') res_prop_test.null_value = '''NULL''' res_prop_test.conf_int = '''NULL''' res_prop_test.alternative = 'two.sided' res_prop_test.method = '4-sample test for equality of proportions ' + \ 'without continuity correction' res_prop_test.data_name = 'smokers2 out of patients' self.res_prop_test = res_prop_test #> pt = prop.test(smokers2, patients, p=rep(c(0.9), 4), correct=FALSE) #> cat_items(pt, "res_prop_test_val.") res_prop_test_val = Holder() res_prop_test_val.statistic = np.array([13.20305530710751 ]).reshape(1, 1, order='F') res_prop_test_val.parameter = np.array([4]).reshape(1, 1, order='F') res_prop_test_val.p_value = 0.010325090041836 res_prop_test_val.estimate = np.array([ 0.848837209302326, 0.967741935483871, 0.838235294117647, 0.9146341463414634 ]).reshape(4, 1, order='F') res_prop_test_val.null_value = np.array([0.9, 0.9, 0.9, 0.9]).reshape(4, 1, order='F') res_prop_test_val.conf_int = '''NULL''' res_prop_test_val.alternative = 'two.sided' res_prop_test_val.method = '4-sample test for given proportions without continuity correction' res_prop_test_val.data_name = 'smokers2 out of patients, null probabilities rep(c(0.9), 4)' self.res_prop_test_val = res_prop_test_val #> pt = prop.test(smokers2[1], patients[1], p=0.9, correct=FALSE) #> cat_items(pt, "res_prop_test_1.") res_prop_test_1 = Holder() res_prop_test_1.statistic = 2.501291989664086 res_prop_test_1.parameter = 1 res_prop_test_1.p_value = 0.113752943640092 res_prop_test_1.estimate = 0.848837209302326 res_prop_test_1.null_value = 0.9 res_prop_test_1.conf_int = np.array( [0.758364348004061, 0.9094787701686766]) res_prop_test_1.alternative = 'two.sided' res_prop_test_1.method = '1-sample proportions test without continuity correction' res_prop_test_1.data_name = 'smokers2[1] out of patients[1], null probability 0.9' self.res_prop_test_1 = res_prop_test_1
def test_mv_mean(): # names = ['id', 'mpg1', 'mpg2', 'add'] x = np.asarray([[1.0, 24.0, 23.5, 1.0], [2.0, 25.0, 24.5, 1.0], [3.0, 21.0, 20.5, 1.0], [4.0, 22.0, 20.5, 1.0], [5.0, 23.0, 22.5, 1.0], [6.0, 18.0, 16.5, 1.0], [7.0, 17.0, 16.5, 1.0], [8.0, 28.0, 27.5, 1.0], [9.0, 24.0, 23.5, 1.0], [10.0, 27.0, 25.5, 1.0], [11.0, 21.0, 20.5, 1.0], [12.0, 23.0, 22.5, 1.0], [1.0, 20.0, 19.0, 0.0], [2.0, 23.0, 22.0, 0.0], [3.0, 21.0, 20.0, 0.0], [4.0, 25.0, 24.0, 0.0], [5.0, 18.0, 17.0, 0.0], [6.0, 17.0, 16.0, 0.0], [7.0, 18.0, 17.0, 0.0], [8.0, 24.0, 23.0, 0.0], [9.0, 20.0, 19.0, 0.0], [10.0, 24.0, 22.0, 0.0], [11.0, 23.0, 22.0, 0.0], [12.0, 19.0, 18.0, 0.0]]) res = smmv.test_mvmean(x[:, 1:3], [21, 21]) res_stata = Holder(p_F=1.25062334808e-09, df_r=22, df_m=2, F=59.91609589041116, T2=125.2791095890415) assert_allclose(res.statistic, res_stata.F, rtol=1e-10) assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-10) assert_allclose(res.t2, res_stata.T2, rtol=1e-10) assert_equal(res.df, [res_stata.df_m, res_stata.df_r]) # diff of paired sample mask = x[:, -1] == 1 x1 = x[mask, 1:3] x0 = x[~mask, 1:3] res_p = smmv.test_mvmean(x1 - x0, [0, 0]) # result Stata hotelling res_stata = Holder( T2=9.698067632850247, df=10, k=2, N=12, F=4.4082126, # not in return List p_F=0.0424) # not in return List res = res_p assert_allclose(res.statistic, res_stata.F, atol=5e-7) assert_allclose(res.pvalue, res_stata.p_F, atol=5e-4) assert_allclose(res.t2, res_stata.T2, rtol=1e-10) assert_equal(res.df, [res_stata.k, res_stata.df]) # mvtest means diff1 diff2, zero res_stata = Holder(p_F=.0423949782937231, df_r=10, df_m=2, F=4.408212560386478, T2=9.69806763285025) assert_allclose(res.statistic, res_stata.F, rtol=1e-12) assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-12) assert_allclose(res.t2, res_stata.T2, rtol=1e-12) assert_equal(res.df, [res_stata.df_m, res_stata.df_r]) dw = weightstats.DescrStatsW(x) ci0 = dw.tconfint_mean(alpha=0.05) nobs = len(x[:, 1:]) ci1 = confint_mvmean_fromstats(dw.mean, np.diag(dw.var), nobs, lin_transf=np.eye(4), alpha=0.05) ci2 = confint_mvmean_fromstats(dw.mean, dw.cov, nobs, lin_transf=np.eye(4), alpha=0.05) assert_allclose(ci1[:2], ci0, rtol=1e-13) assert_allclose(ci2[:2], ci0, rtol=1e-13) # test from data res = smmv.confint_mvmean(x, lin_transf=np.eye(4), alpha=0.05) assert_allclose(res, ci2, rtol=1e-13)
from statsmodels.stats.correlation_tools import ( corr_nearest, corr_clipped, cov_nearest, _project_correlation_factors, corr_nearest_factor, _spg_optim, corr_thresholded, cov_nearest_factor_homog, FactoredPSDMatrix) from statsmodels.tools.testing import Holder def norm_f(x, y): '''Frobenious norm (squared sum) of difference between two arrays ''' d = ((x - y)**2).sum() return np.sqrt(d) # R library Matrix results cov1_r = Holder() #> nc <- nearPD(pr, conv.tol = 1e-7, keepDiag = TRUE, doDykstra =FALSE, corr=TRUE) #> cat_items(nc, prefix="cov1_r.") cov1_r.mat = '''<S4 object of class structure("dpoMatrix", package = "Matrix")>''' cov1_r.eigenvalues = np.array([ 4.197315628646795, 0.7540460243978023, 0.5077608149667492, 0.3801267599652769, 0.1607508970775889, 4.197315628646795e-08 ]) cov1_r.corr = '''TRUE''' cov1_r.normF = 0.0743805226512533 cov1_r.iterations = 11 cov1_r.rel_tol = 8.288594638441735e-08 cov1_r.converged = '''TRUE''' #> mkarray2(as.matrix(nc$mat), name="cov1_r.mat") cov1_r.mat = np.array([ 1, 0.487968018215892, 0.642651880010906, 0.4906386709070835,
def test_cohens_kappa_irr(): ck_w3 = Holder() ck_w4 = Holder() #>r = kappa2(anxiety[,1:2], c(0,0,0,1,1,1)) #> cat_items(r, pref="ck_w3.") ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,0,1,1,1)" ck_w3.irr_name = 'Kappa' ck_w3.value = 0.1891892 ck_w3.stat_name = 'z' ck_w3.statistic = 0.5079002 ck_w3.p_value = 0.6115233 #> r = kappa2(anxiety[,1:2], c(0,0,1,1,2,2)) #> cat_items(r, pref="ck_w4.") ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2,2)" ck_w4.irr_name = 'Kappa' ck_w4.value = 0.2820513 ck_w4.stat_name = 'z' ck_w4.statistic = 1.257410 ck_w4.p_value = 0.2086053 ck_w1 = Holder() ck_w2 = Holder() ck_w3 = Holder() ck_w4 = Holder() #> r = kappa2(anxiety[,2:3]) #> cat_items(r, pref="ck_w1.") ck_w1.method = "Cohen's Kappa for 2 Raters (Weights: unweighted)" ck_w1.irr_name = 'Kappa' ck_w1.value = -0.006289308 ck_w1.stat_name = 'z' ck_w1.statistic = -0.0604067 ck_w1.p_value = 0.9518317 #> r = kappa2(anxiety[,2:3], "equal") #> cat_items(r, pref="ck_w2.") ck_w2.method = "Cohen's Kappa for 2 Raters (Weights: equal)" ck_w2.irr_name = 'Kappa' ck_w2.value = 0.1459075 ck_w2.stat_name = 'z' ck_w2.statistic = 1.282472 ck_w2.p_value = 0.1996772 #> r = kappa2(anxiety[,2:3], "squared") #> cat_items(r, pref="ck_w3.") ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: squared)" ck_w3.irr_name = 'Kappa' ck_w3.value = 0.2520325 ck_w3.stat_name = 'z' ck_w3.statistic = 1.437451 ck_w3.p_value = 0.1505898 #> r = kappa2(anxiety[,2:3], c(0,0,1,1,2)) #> cat_items(r, pref="ck_w4.") ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2)" ck_w4.irr_name = 'Kappa' ck_w4.value = 0.2391304 ck_w4.stat_name = 'z' ck_w4.statistic = 1.223734 ck_w4.p_value = 0.2210526 all_cases = [(ck_w1, None, None), (ck_w2, None, 'linear'), (ck_w2, np.arange(5), None), (ck_w2, np.arange(5), 'toeplitz'), (ck_w3, None, 'quadratic'), (ck_w3, np.arange(5)**2, 'toeplitz'), (ck_w3, 4*np.arange(5)**2, 'toeplitz'), (ck_w4, [0,0,1,1,2], 'toeplitz')] #Note R:irr drops the missing category level 4 and uses the reduced matrix r = np.histogramdd(anxiety[:,1:], ([1, 2, 3, 4, 6, 7], [1, 2, 3, 4, 6, 7])) for res2, w, wt in all_cases: msg = repr(w) + repr(wt) res1 = cohens_kappa(r[0], weights=w, wt=wt) assert_almost_equal(res1.kappa, res2.value, decimal=6, err_msg=msg) assert_almost_equal(res1.z_value, res2.statistic, decimal=5, err_msg=msg) assert_almost_equal(res1.pvalue_two_sided, res2.p_value, decimal=6, err_msg=msg)
def setup_class(cls): cls.res2 = tost_clinic_paired x, y = clinic[:15, 3], clinic[15:, 3] cls.res1 = Holder() res = smws.ttost_paired(x, y, -0.6, 0.6, transform=None) cls.res1.pvalue = res[0]
def test_rank_compare_2indep1(): # Example from Munzel and Hauschke 2003 # data is given by counts, expand to observations levels = [-2, -1, 0, 1, 2] new = [24, 37, 21, 19, 6] active = [11, 51, 22, 21, 7] x1 = np.repeat(levels, new) x2 = np.repeat(levels, active) # using lawstat # > brunner.munzel.test(xn, xa) #brunnermunzel.test(x, y) res2_t = Holder(statistic=1.1757561456582, df=204.2984239868, pvalue=0.2410606649547, ci=[0.4700629827705593, 0.6183882855872511], prob=0.5442256341789052) res = rank_compare_2indep(x1, x2, use_t=False) assert_allclose(res.statistic, -res2_t.statistic, rtol=1e-13) assert_allclose(res.prob1, 1 - res2_t.prob, rtol=1e-13) assert_allclose(res.prob2, res2_t.prob, rtol=1e-13) tt = res.test_prob_superior() # TODO: return HolderTuple # assert_allclose(tt.statistic, res2_t.statistic) # TODO: check sign/direction in lawstat assert_allclose(tt[0], -res2_t.statistic, rtol=1e-13) ci = res.conf_int(alpha=0.05) # we compare normal confint with t confint, lower rtol assert_allclose(ci, 1 - np.array(res2_t.ci)[::-1], rtol=0.005) # test consistency of test and confint res_lb = res.test_prob_superior(value=ci[0]) assert_allclose(res_lb[1], 0.05, rtol=1e-13) res_ub = res.test_prob_superior(value=ci[1]) assert_allclose(res_ub[1], 0.05, rtol=1e-13) # test consistency of tost and confint # lower margin is binding, alternative larger res_tost = res.tost_prob_superior(ci[0], ci[1] * 1.05) assert_allclose(res_tost.results_larger.pvalue, 0.025, rtol=1e-13) assert_allclose(res_tost.pvalue, 0.025, rtol=1e-13) # upper margin is binding, alternative smaller res_tost = res.tost_prob_superior(ci[0] * 0.85, ci[1]) assert_allclose(res_tost.results_smaller.pvalue, 0.025, rtol=1e-13) assert_allclose(res_tost.pvalue, 0.025, rtol=1e-13) # use t-distribution # our ranking is defined as reversed from lawstat, and BM article # revere direction to match our definition x1, x2 = x2, x1 res = rank_compare_2indep(x1, x2, use_t=True) assert_allclose(res.statistic, res2_t.statistic, rtol=1e-13) tt = res.test_prob_superior() # TODO: return HolderTuple # assert_allclose(tt.statistic, res2_t.statistic) # TODO: check sign/direction in lawstat, reversed from ours assert_allclose(tt[0], res2_t.statistic, rtol=1e-13) assert_allclose(tt[1], res2_t.pvalue, rtol=1e-13) assert_allclose(res.pvalue, res2_t.pvalue, rtol=1e-13) assert_allclose(res.df, res2_t.df, rtol=1e-13) ci = res.conf_int(alpha=0.05) assert_allclose(ci, res2_t.ci, rtol=1e-11) # test consistency of test and confint res_lb = res.test_prob_superior(value=ci[0]) assert_allclose(res_lb[1], 0.05, rtol=1e-11) res_ub = res.test_prob_superior(value=ci[1]) assert_allclose(res_ub[1], 0.05, rtol=1e-11) # test consistency of tost and confint # lower margin is binding, alternative larger res_tost = res.tost_prob_superior(ci[0], ci[1] * 1.05) assert_allclose(res_tost.results_larger.pvalue, 0.025, rtol=1e-10) assert_allclose(res_tost.pvalue, 0.025, rtol=1e-10) # upper margin is binding, alternative smaller res_tost = res.tost_prob_superior(ci[0] * 0.85, ci[1]) assert_allclose(res_tost.results_smaller.pvalue, 0.025, rtol=1e-10) assert_allclose(res_tost.pvalue, 0.025, rtol=1e-10) # extras # cohen's d esd = res.effectsize_normal() p = prob_larger_continuous(stats.norm(loc=esd), stats.norm) # round trip assert_allclose(p, res.prob1, rtol=1e-13) # round trip with cohen's d pc = cohensd2problarger(esd) assert_allclose(pc, res.prob1, rtol=1e-13) ci_tr = res.confint_lintransf(1, -1) assert_allclose(ci_tr, 1 - np.array(res2_t.ci)[::-1], rtol=0.005)
def setup_class(cls): cls.res2 = tost_clinic_indep_1_pooled x, y = clinic[:15, 2], clinic[15:, 2] cls.res1 = Holder() res = smws.ttost_ind(x, y, -0.6, 0.6, usevar='pooled') cls.res1.pvalue = res[0]
# -*- coding: utf-8 -*- """ Created on Thu Jan 6 13:55:50 2022 Author: Josef Perktod License: BSD-3 """ # flake8: noqa import numpy as np from statsmodels.tools.testing import Holder hurdle_poisson = Holder() # r library pscl, docvis data # > mod = hurdle( docvis ~ aget + totchr, data=dt, zero.dist = "poisson") hurdle_poisson.method = 'BFGS' hurdle_poisson.n = 3629 hurdle_poisson.df_null = 3627 hurdle_poisson.df_residual = 3623 hurdle_poisson.loglik = -13612.9091771797 hurdle_poisson.aic = 27237.81835436 hurdle_poisson.bic = 27274.9986288 hurdle_poisson.vcov = np.array([ 0.000239404800324688, -4.59559682721834e-05, -4.59865258972631e-05, 0, 0, 0, -4.59559682721834e-05, 2.54346275490526e-05, -1.20164687288645e-06, 0, 0, 0, -4.59865258972631e-05, -1.20164687288644e-06, 2.01936456643824e-05, 0, 0, 0, 0, 0, 0, 0.00241829560973498, -0.000548499729173446, -0.000636055275016966, 0, 0, 0, -0.000548499729173446, 0.000351548196602719, -6.30088654100178e-05, 0, 0, 0, -0.000636055275016966, -6.30088654100178e-05, 0.000562508220544602