def test_nested_linear(self): family = Gaussian() endog, exog, group = load_data("gee_nested_linear_1.csv") group_n = [] for i in range(endog.shape[0] // 10): group_n.extend([ 0, ] * 5) group_n.extend([ 1, ] * 5) group_n = np.array(group_n)[:, None] dp = Independence() md = GEE(endog, exog, group, None, family, dp) mdf1 = md.fit() # From statsmodels.GEE (not an independent test) cf = np.r_[-0.1671073, 1.00467426, -2.01723004, 0.97297106] se = np.r_[0.08629606, 0.04058653, 0.04067038, 0.03777989] assert_almost_equal(mdf1.params, cf, decimal=6) assert_almost_equal(mdf1.standard_errors(), se, decimal=6) ne = Nested() md = GEE(endog, exog, group, None, family, ne, dep_data=group_n) mdf2 = md.fit(start_params=mdf1.params) # From statsmodels.GEE (not an independent test) cf = np.r_[-0.16655319, 1.02183688, -2.00858719, 1.00101969] se = np.r_[0.08632616, 0.02913582, 0.03114428, 0.02893991] assert_almost_equal(mdf2.params, cf, decimal=6) assert_almost_equal(mdf2.standard_errors(), se, decimal=6)
def test_nominal(self): family = Multinomial(3) endog, exog, groups = load_data("gee_nominal_1.csv", icept=False) # Test with independence correlation v = Independence() md = GEE(endog, exog, groups, None, family, v) md.setup_nominal() mdf1 = md.fit() # From statsmodels.GEE (not an independent test) cf1 = np.r_[0.44944752, 0.45569985, -0.92007064, -0.46766728] se1 = np.r_[0.09801821, 0.07718842, 0.13229421, 0.08544553] assert_almost_equal(mdf1.params, cf1, decimal=5) assert_almost_equal(mdf1.standard_errors(), se1, decimal=5) # Test with global odds ratio dependence v = GlobalOddsRatio("nominal") md = GEE(endog, exog, groups, None, family, v) md.setup_nominal() mdf2 = md.fit(start_params=mdf1.params) # From statsmodels.GEE (not an independent test) cf2 = np.r_[0.45397549, 0.42278345, -0.91997131, -0.50115943] se2 = np.r_[0.09646057, 0.07405713, 0.1324629, 0.09025019] assert_almost_equal(mdf2.params, cf2, decimal=5) assert_almost_equal(mdf2.standard_errors(), se2, decimal=5)
def setup_class(cls): endog, exog, group_n = load_data("gee_poisson_1.csv") family = Poisson() vi = Independence() # Test with formulas D = np.concatenate((endog[:, None], group_n[:, None], exog[:, 1:]), axis=1) D = pd.DataFrame(D) D.columns = [ "Y", "Id", ] + ["X%d" % (k + 1) for k in range(exog.shape[1] - 1)] cls.mod = GEE.from_formula("Y ~ X1 + X2 + X3 + X4 + X5", "Id", D, family=family, cov_struct=vi) cls.start_params = np.array([ -0.03644504, -0.05432094, 0.01566427, 0.57628591, -0.0046566, -0.47709315 ])
def test_poisson_epil(self): cur_dir = os.path.dirname(os.path.abspath(__file__)) fname = os.path.join(cur_dir, "results", "epil.csv") data = pd.read_csv(fname) fam = Poisson() ind = Independence() mod1 = GEE.from_formula("y ~ age + trt + base", data["subject"], data, cov_struct=ind, family=fam) rslt1 = mod1.fit() # Coefficients should agree with GLM from statsmodels.genmod.generalized_linear_model import GLM from statsmodels.genmod import families mod2 = GLM.from_formula("y ~ age + trt + base", data, family=families.Poisson()) rslt2 = mod2.fit(scale="X2") # don't use wrapper, asserts_xxx don't work rslt1 = rslt1._results rslt2 = rslt2._results assert_almost_equal(rslt1.params, rslt2.params, decimal=6) assert_almost_equal(rslt1.scale, rslt2.scale, decimal=6)
def test_poisson_epil(self): cur_dir = os.path.dirname(os.path.abspath(__file__)) fname = os.path.join(cur_dir, "results", "epil.csv") data = pd.read_csv(fname) fam = Poisson() ind = Independence() md1 = GEE.from_formula("y ~ age + trt + base", data, groups=data["subject"], cov_struct=ind, family=fam) mdf1 = md1.fit() # Coefficients should agree with GLM from statsmodels.genmod.generalized_linear_model import GLM from statsmodels.genmod import families md2 = GLM.from_formula("y ~ age + trt + base", data, family=families.Poisson()) mdf2 = md2.fit(scale="X2") assert_almost_equal(mdf1.params, mdf2.params, decimal=6) assert_almost_equal(mdf1.scale, mdf2.scale, decimal=6)
def test_compare_poisson(self): vs = Independence() family = Poisson() Y = np.ceil(-np.log(np.random.uniform(size=100))) X1 = np.random.normal(size=100) X2 = np.random.normal(size=100) X3 = np.random.normal(size=100) groups = np.random.randint(0, 4, size=100) D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3}) mod1 = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D, family=family, cov_struct=vs) rslt1 = mod1.fit() mod2 = sm.poisson("Y ~ X1 + X2 + X3", data=D) rslt2 = mod2.fit(disp=False) assert_almost_equal(rslt1.params.values, rslt2.params.values, decimal=10)
def test_compare_OLS(self): """ Gaussian GEE with independence correlation should agree exactly with OLS for parameter estimates and standard errors derived from the naive covariance estimate. """ vs = Independence() family = Gaussian() Y = np.random.normal(size=100) X1 = np.random.normal(size=100) X2 = np.random.normal(size=100) X3 = np.random.normal(size=100) groups = np.kron(range(20), np.ones(5)) D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3}) md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=groups, family=family, covstruct=vs) mdf = md.fit() ols = sm.ols("Y ~ X1 + X2 + X3", data=D).fit() assert_almost_equal(ols.params.values, mdf.params, decimal=10) naive_tvalues = mdf.params / \ np.sqrt(np.diag(mdf.naive_covariance)) assert_almost_equal(naive_tvalues, ols.tvalues, decimal=10)
def test_nominal(self): family = Multinomial(3) endog, exog, groups = load_data("gee_nominal_1.csv", icept=False) # Test with independence correlation va = Independence() mod1 = NominalGEE(endog, exog, groups, None, family, va) rslt1 = mod1.fit() # Regression test cf1 = np.r_[0.44944752, 0.45569985, -0.92007064, -0.46766728] se1 = np.r_[0.09801821, 0.07718842, 0.13229421, 0.08544553] assert_almost_equal(rslt1.params, cf1, decimal=5) assert_almost_equal(rslt1.standard_errors(), se1, decimal=5) # Test with global odds ratio dependence va = GlobalOddsRatio("nominal") mod2 = NominalGEE(endog, exog, groups, None, family, va) rslt2 = mod2.fit(start_params=rslt1.params) # Regression test cf2 = np.r_[0.45448248, 0.41945568, -0.92008924, -0.50485758] se2 = np.r_[0.09632274, 0.07433944, 0.13264646, 0.0911768] assert_almost_equal(rslt2.params, cf2, decimal=5) assert_almost_equal(rslt2.standard_errors(), se2, decimal=5) # Make sure we get the correct results type assert_equal(type(rslt1), NominalGEEResultsWrapper) assert_equal(type(rslt1._results), NominalGEEResults)
def test_nominal(self): family = Multinomial(3) endog, exog, groups = load_data("gee_nominal_1.csv", icept=False) # Test with independence correlation v = Independence() md = NominalGEE(endog, exog, groups, None, family, v) mdf1 = md.fit() # From statsmodels.GEE (not an independent test) cf1 = np.r_[0.44944752, 0.45569985, -0.92007064, -0.46766728] se1 = np.r_[0.09801821, 0.07718842, 0.13229421, 0.08544553] assert_almost_equal(mdf1.params, cf1, decimal=5) assert_almost_equal(mdf1.standard_errors(), se1, decimal=5) # Test with global odds ratio dependence v = GlobalOddsRatio("nominal") md = NominalGEE(endog, exog, groups, None, family, v) mdf2 = md.fit(start_params=mdf1.params) # From statsmodels.GEE (not an independent test) cf2 = np.r_[0.45448248, 0.41945568, -0.92008924, -0.50485758] se2 = np.r_[0.09632274, 0.07433944, 0.13264646, 0.0911768] assert_almost_equal(mdf2.params, cf2, decimal=5) assert_almost_equal(mdf2.standard_errors(), se2, decimal=5)
def gendat_overdispersed(): exs = Overdispersed_simulator() exs.params = np.r_[2., 0.2, 0.2, -0.1, -0.2] exs.ngroups = 200 exs.scale_inv = 2. exs.dparams = [] exs.simulate() return exs, Independence()
def setup_class(cls): family = Multinomial(3) endog, exog, groups = load_data("gee_nominal_1.csv", icept=False) # Test with independence correlation va = Independence() cls.mod = NominalGEE(endog, exog, groups, None, family, va) cls.start_params = np.array( [0.44944752, 0.45569985, -0.92007064, -0.46766728])
def test_wrapper(self): endog, exog, groups = load_data("gee_nominal_1.csv", icept=False) endog = pd.Series(endog, name='yendog') exog = pd.DataFrame(exog) groups = pd.Series(groups, name='the_group') family = Multinomial(3) va = Independence() mod = NominalGEE(endog, exog, groups, None, family, va) rslt2 = mod.fit() check_wrapper(rslt2)
def setup_class(cls): endog, exog, group_n = load_data("gee_poisson_1.csv") family = Poisson() vi = Independence() cls.mod = GEE(endog, exog, group_n, None, family, vi) cls.start_params = np.array([ -0.03644504, -0.05432094, 0.01566427, 0.57628591, -0.0046566, -0.47709315 ])
def test_wrapper(self): endog, exog, group_n = load_data("gee_poisson_1.csv", icept=False) endog = pd.Series(endog) exog = pd.DataFrame(exog) group_n = pd.Series(group_n) family = Poisson() vi = Independence() mod = GEE(endog, exog, group_n, None, family, vi) rslt2 = mod.fit() check_wrapper(rslt2)
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) y_count = np.random.poisson(np.exp(x.sum(1) - x.mean())) groups = np.random.randint(0, 4, size=x.shape[0]) # use start_params to speed up test, difficult convergence not tested start_params = np.array([0., 1., 1., 1.]) # no sm. import # vi = sm.dependence_structures.Independence() from statsmodels.genmod.dependence_structures import Independence vi = Independence() family = sm.families.Poisson() self.results = sm.GEE(y_count, self.exog, groups, family=family, cov_struct=vi).fit(start_params=start_params)
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) #y_count = np.random.poisson(np.exp(x.sum(1) - x.mean())) y_count = np.random.poisson(np.exp(x.sum(1) - x.sum(1).mean(0))) groups = np.random.randint(0, 4, size=x.shape[0]) # use start_params to speed up test, difficult convergence not tested start_params = np.array([0., 1., 1., 1.]) # params_est = np.array([-0.0063238 , 0.99463752, 1.02790201, 0.98080081]) # no sm. import # vi = sm.dependence_structures.Independence() from statsmodels.genmod.dependence_structures import Independence vi = Independence() family = sm.families.Poisson() mod = sm.GEE(y_count, self.exog, groups, family=family, cov_struct=vi) self.results = mod.fit(start_params=start_params, cov_type='bias_reduced')
def test_linear_constrained(self): family = Gaussian() exog = np.random.normal(size=(300, 4)) exog[:, 0] = 1 endog = np.dot(exog, np.r_[1, 1, 0, 0.2]) +\ np.random.normal(size=300) group = np.kron(np.arange(100), np.r_[1, 1, 1]) vi = Independence() ve = Exchangeable() L = np.r_[[[0, 0, 0, 1]]] R = np.r_[0, ] for j, v in enumerate((vi, ve)): md = GEE(endog, exog, group, None, family, v, constraint=(L, R)) mdf = md.fit() assert_almost_equal(mdf.params[3], 0, decimal=10)
def test_compare_OLS(self): """ Gaussian GEE with independence correlation should agree exactly with OLS for parameter estimates and standard errors derived from the naive covariance estimate. """ vs = Independence() family = Gaussian() Y = np.random.normal(size=100) X1 = np.random.normal(size=100) X2 = np.random.normal(size=100) X3 = np.random.normal(size=100) groups = np.kron(lrange(20), np.ones(5)) D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3}) md = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D, family=family, cov_struct=vs) mdf = md.fit() ols = sm.ols("Y ~ X1 + X2 + X3", data=D).fit() # don't use wrapper, asserts_xxx don't work ols = ols._results assert_almost_equal(ols.params, mdf.params, decimal=10) se = mdf.standard_errors(cov_type="naive") assert_almost_equal(ols.bse, se, decimal=10) naive_tvalues = mdf.params / \ np.sqrt(np.diag(mdf.cov_naive)) assert_almost_equal(naive_tvalues, ols.tvalues, decimal=10)
def test_compare_poisson(self): vs = Independence() family = Poisson() Y = np.ceil(-np.log(np.random.uniform(size=100))) X1 = np.random.normal(size=100) X2 = np.random.normal(size=100) X3 = np.random.normal(size=100) groups = np.random.randint(0, 4, size=100) D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3}) md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=groups, family=family, covstruct=vs).fit() sml = sm.poisson("Y ~ X1 + X2 + X3", data=D).fit() assert_almost_equal(sml.params.values, md.params, decimal=10)
def test_compare_logit(self): vs = Independence() family = Binomial() Y = 1 * (np.random.normal(size=100) < 0) X1 = np.random.normal(size=100) X2 = np.random.normal(size=100) X3 = np.random.normal(size=100) groups = np.random.randint(0, 4, size=100) D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3}) md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=groups, family=family, cov_struct=vs).fit() sml = sm.logit("Y ~ X1 + X2 + X3", data=D).fit(disp=False) assert_almost_equal(sml.params.values, md.params, decimal=10)
def test_compare_logit(self): vs = Independence() family = Binomial() Y = 1 * (np.random.normal(size=100) < 0) X1 = np.random.normal(size=100) X2 = np.random.normal(size=100) X3 = np.random.normal(size=100) groups = np.random.randint(0, 4, size=100) D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3}) mod1 = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D, family=family, cov_struct=vs) rslt1 = mod1.fit() mod2 = sm.logit("Y ~ X1 + X2 + X3", data=D) rslt2 = mod2.fit() assert_almost_equal(rslt1.params, rslt2.params, decimal=10)
# Loop over data generating models for gendat in gendats: pvalues = [] params = [] std_errors = [] dparams = [] for j in range(nrep): da, va = gendat() ga = Poisson() # Poisson seems to be more sensitive to starting values, # so we run the independence model first. md = GEE(da.endog, da.exog, da.group, da.time, ga, Independence()) mdf = md.fit() md = GEE(da.endog, da.exog, da.group, da.time, ga, va) mdf = md.fit(start_params=mdf.params) if mdf is None or (not mdf.converged): print("Failed to converge") continue scale_inv = 1. / md.estimate_scale() dparams.append(np.r_[va.dparams, scale_inv]) params.append(np.asarray(mdf.params)) std_errors.append(np.asarray(mdf.standard_errors)) da, va = gendat() ga = Poisson()
def test_logistic(self): """ R code for comparing results: library(gee) Z = read.csv("results/gee_logistic_1.csv", header=FALSE) Y = Z[,2] Id = Z[,1] X1 = Z[,3] X2 = Z[,4] X3 = Z[,5] mi = gee(Y ~ X1 + X2 + X3, id=Id, family=binomial, corstr="independence") smi = summary(mi) u = coefficients(smi) cfi = paste(u[,1], collapse=",") sei = paste(u[,4], collapse=",") me = gee(Y ~ X1 + X2 + X3, id=Id, family=binomial, corstr="exchangeable") sme = summary(me) u = coefficients(sme) cfe = paste(u[,1], collapse=",") see = paste(u[,4], collapse=",") ma = gee(Y ~ X1 + X2 + X3, id=Id, family=binomial, corstr="AR-M") sma = summary(ma) u = coefficients(sma) cfa = paste(u[,1], collapse=",") sea = paste(u[,4], collapse=",") sprintf("cf = [[%s],[%s],[%s]]", cfi, cfe, cfa) sprintf("se = [[%s],[%s],[%s]]", sei, see, sea) """ endog, exog, group = load_data("gee_logistic_1.csv") # Time values for the autoregressive model T = np.zeros(len(endog)) idx = set(group) for ii in idx: jj = np.flatnonzero(group == ii) T[jj] = range(len(jj)) family = Binomial() ve = Exchangeable() vi = Independence() va = Autoregressive() # From R gee cf = [[ 0.0167272965285882, 1.13038654425893, -1.86896345082962, 1.09397608331333 ], [ 0.0178982283915449, 1.13118798191788, -1.86133518416017, 1.08944256230299 ], [ 0.0109621937947958, 1.13226505028438, -1.88278757333046, 1.09954623769449 ]] se = [[ 0.127291720283049, 0.166725808326067, 0.192430061340865, 0.173141068839597 ], [ 0.127045031730155, 0.165470678232842, 0.192052750030501, 0.173174779369249 ], [ 0.127240302296444, 0.170554083928117, 0.191045527104503, 0.169776150974586 ]] for j, v in enumerate((vi, ve, va)): md = GEE(endog, exog, group, T, family, v) mdf = md.fit() if id(v) != id(va): assert_almost_equal(mdf.params, cf[j], decimal=6) assert_almost_equal(mdf.standard_errors(), se[j], decimal=6) # Test with formulas D = np.concatenate((endog[:, None], group[:, None], exog[:, 1:]), axis=1) D = pd.DataFrame(D) D.columns = [ "Y", "Id", ] + ["X%d" % (k + 1) for k in range(exog.shape[1] - 1)] for j, v in enumerate((vi, ve)): md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=D.loc[:, "Id"], family=family, covstruct=v) mdf = md.fit() assert_almost_equal(mdf.params, cf[j], decimal=6) assert_almost_equal(mdf.standard_errors(), se[j], decimal=6) # Check for run-time exceptions in summary print mdf.summary()
def test_poisson(self): """ library(gee) Z = read.csv("results/gee_poisson_1.csv", header=FALSE) Y = Z[,2] Id = Z[,1] X1 = Z[,3] X2 = Z[,4] X3 = Z[,5] X4 = Z[,6] X5 = Z[,7] mi = gee(Y ~ X1 + X2 + X3 + X4 + X5, id=Id, family=poisson, corstr="independence", scale.fix=TRUE) smi = summary(mi) u = coefficients(smi) cfi = paste(u[,1], collapse=",") sei = paste(u[,4], collapse=",") me = gee(Y ~ X1 + X2 + X3 + X4 + X5, id=Id, family=poisson, corstr="exchangeable", scale.fix=TRUE) sme = summary(me) u = coefficients(sme) cfe = paste(u[,1], collapse=",") see = paste(u[,4], collapse=",") sprintf("cf = [[%s],[%s]]", cfi, cfe) sprintf("se = [[%s],[%s]]", sei, see) """ family = Poisson() endog, exog, group_n = load_data("gee_poisson_1.csv") vi = Independence() ve = Exchangeable() # From R gee cf = [[ -0.0364450410793481, -0.0543209391301178, 0.0156642711741052, 0.57628591338724, -0.00465659951186211, -0.477093153099256 ], [ -0.0315615554826533, -0.0562589480840004, 0.0178419412298561, 0.571512795340481, -0.00363255566297332, -0.475971696727736 ]] se = [[ 0.0611309237214186, 0.0390680524493108, 0.0334234174505518, 0.0366860768962715, 0.0304758505008105, 0.0316348058881079 ], [ 0.0610840153582275, 0.0376887268649102, 0.0325168379415177, 0.0369786751362213, 0.0296141014225009, 0.0306115470200955 ]] for j, v in enumerate((vi, ve)): md = GEE(endog, exog, group_n, None, family, v) mdf = md.fit() assert_almost_equal(mdf.params, cf[j], decimal=5) assert_almost_equal(mdf.standard_errors(), se[j], decimal=6) # Test with formulas D = np.concatenate((endog[:, None], group_n[:, None], exog[:, 1:]), axis=1) D = pd.DataFrame(D) D.columns = [ "Y", "Id", ] + ["X%d" % (k + 1) for k in range(exog.shape[1] - 1)] for j, v in enumerate((vi, ve)): md = GEE.from_formula("Y ~ X1 + X2 + X3 + X4 + X5", D, None, groups=D.loc[:, "Id"], family=family, covstruct=v) mdf = md.fit() assert_almost_equal(mdf.params, cf[j], decimal=5) assert_almost_equal(mdf.standard_errors(), se[j], decimal=6)
def test_linear(self): """ library(gee) Z = read.csv("results/gee_linear_1.csv", header=FALSE) Y = Z[,2] Id = Z[,1] X1 = Z[,3] X2 = Z[,4] X3 = Z[,5] mi = gee(Y ~ X1 + X2 + X3, id=Id, family=gaussian, corstr="independence", tol=1e-8, maxit=100) smi = summary(mi) u = coefficients(smi) cfi = paste(u[,1], collapse=",") sei = paste(u[,4], collapse=",") me = gee(Y ~ X1 + X2 + X3, id=Id, family=gaussian, corstr="exchangeable", tol=1e-8, maxit=100) sme = summary(me) u = coefficients(sme) cfe = paste(u[,1], collapse=",") see = paste(u[,4], collapse=",") sprintf("cf = [[%s],[%s]]", cfi, cfe) sprintf("se = [[%s],[%s]]", sei, see) """ family = Gaussian() endog, exog, group = load_data("gee_linear_1.csv") vi = Independence() ve = Exchangeable() # From R gee cf = [[ -0.01850226507491, 0.81436304278962, -1.56167635393184, 0.794239361055003 ], [ -0.0182920577154767, 0.814898414022467, -1.56194040106201, 0.793499517527478 ]] se = [[ 0.0440733554189401, 0.0479993639119261, 0.0496045952071308, 0.0479467597161284 ], [ 0.0440369906460754, 0.0480069787567662, 0.049519758758187, 0.0479760443027526 ]] for j, v in enumerate((vi, ve)): md = GEE(endog, exog, group, None, family, v) mdf = md.fit() assert_almost_equal(mdf.params, cf[j], decimal=10) assert_almost_equal(mdf.standard_errors(), se[j], decimal=10) # Test with formulas D = np.concatenate((endog[:, None], group[:, None], exog[:, 1:]), axis=1) D = pd.DataFrame(D) D.columns = [ "Y", "Id", ] + ["X%d" % (k + 1) for k in range(exog.shape[1] - 1)] for j, v in enumerate((vi, ve)): md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=D.loc[:, "Id"], family=family, covstruct=v) mdf = md.fit() assert_almost_equal(mdf.params, cf[j], decimal=10) assert_almost_equal(mdf.standard_errors(), se[j], decimal=10)
def test_scoretest(self): # Regression tests np.random.seed(6432) n = 200 # Must be divisible by 4 exog = np.random.normal(size=(n, 4)) endog = exog[:, 0] + exog[:, 1] + exog[:, 2] endog += 3 * np.random.normal(size=n) group = np.kron(np.arange(n / 4), np.ones(4)) # Test under the null. L = np.array([[1., -1, 0, 0]]) R = np.array([ 0., ]) family = Gaussian() va = Independence() mod1 = GEE(endog, exog, group, family=family, cov_struct=va, constraint=(L, R)) rslt1 = mod1.fit() assert_almost_equal(mod1.score_test_results["statistic"], 1.08126334) assert_almost_equal(mod1.score_test_results["p-value"], 0.2984151086) # Test under the alternative. L = np.array([[1., -1, 0, 0]]) R = np.array([ 1.0, ]) family = Gaussian() va = Independence() mod2 = GEE(endog, exog, group, family=family, cov_struct=va, constraint=(L, R)) rslt2 = mod2.fit() assert_almost_equal(mod2.score_test_results["statistic"], 3.491110965) assert_almost_equal(mod2.score_test_results["p-value"], 0.0616991659) # Compare to Wald tests exog = np.random.normal(size=(n, 2)) L = np.array([[1, -1]]) R = np.array([0.]) f = np.r_[1, -1] for i in range(10): endog = exog[:, 0] + (0.5 + i/10.)*exog[:, 1] +\ np.random.normal(size=n) family = Gaussian() va = Independence() mod0 = GEE(endog, exog, group, family=family, cov_struct=va) rslt0 = mod0.fit() family = Gaussian() va = Independence() mod1 = GEE(endog, exog, group, family=family, cov_struct=va, constraint=(L, R)) rslt1 = mod1.fit() se = np.sqrt(np.dot(f, np.dot(rslt0.cov_params(), f))) wald_z = np.dot(f, rslt0.params) / se wald_p = 2 * norm.cdf(-np.abs(wald_z)) score_p = mod1.score_test_results["p-value"] assert (np.abs(wald_p - score_p) < 0.02)