def test_default_time(self): """ Check that the time defaults work correctly. """ endog, exog, group = load_data("gee_logistic_1.csv") # Time values for the autoregressive model T = np.zeros(len(endog)) idx = set(group) for ii in idx: jj = np.flatnonzero(group == ii) T[jj] = lrange(len(jj)) family = Binomial() va = Autoregressive() md1 = GEE(endog, exog, group, family=family, cov_struct=va) mdf1 = md1.fit() md2 = GEE(endog, exog, group, time=T, family=family, cov_struct=va) mdf2 = md2.fit() assert_almost_equal(mdf1.params, mdf2.params, decimal=6) assert_almost_equal(mdf1.standard_errors(), mdf2.standard_errors(), decimal=6)
def test_formulas(self): """ Check formulas, especially passing groups and time as either variable names or arrays. """ n = 100 Y = np.random.normal(size=n) X1 = np.random.normal(size=n) mat = np.concatenate((np.ones((n, 1)), X1[:, None]), axis=1) Time = np.random.uniform(size=n) groups = np.kron(lrange(20), np.ones(5)) data = pd.DataFrame({"Y": Y, "X1": X1, "Time": Time, "groups": groups}) va = Autoregressive() family = Gaussian() mod1 = GEE(Y, mat, groups, time=Time, family=family, cov_struct=va) rslt1 = mod1.fit() mod2 = GEE.from_formula("Y ~ X1", groups, data, time=Time, family=family, cov_struct=va) rslt2 = mod2.fit() mod3 = GEE.from_formula("Y ~ X1", groups, data, time="Time", family=family, cov_struct=va) rslt3 = mod3.fit() mod4 = GEE.from_formula("Y ~ X1", "groups", data, time=Time, family=family, cov_struct=va) rslt4 = mod4.fit() mod5 = GEE.from_formula("Y ~ X1", "groups", data, time="Time", family=family, cov_struct=va) rslt5 = mod5.fit() assert_almost_equal(rslt1.params, rslt2.params, decimal=8) assert_almost_equal(rslt1.params, rslt3.params, decimal=8) assert_almost_equal(rslt1.params, rslt4.params, decimal=8) assert_almost_equal(rslt1.params, rslt5.params, decimal=8) check_wrapper(rslt2)
def gendat_ar1(): ars = AR_simulator() ars.ngroups = 200 ars.params = np.r_[0, -0.8, 1.2, 0, 0.5] ars.error_sd = 2 ars.dparams = [ ar, ] ars.simulate() return ars, Autoregressive()
def gendat_ar0(msg=False): ars = AR_simulator() ars.ngroups = 200 ars.params = np.r_[0, -1, 1, 0, 0.5] ars.error_sd = 2 ars.dparams = [ ar, ] ars.simulate() return ars, Autoregressive()
def test_autoregressive(self): dep_params_true = [0, 0.589208623896, 0.559823804948] params_true = [[1.08043787, 1.12709319, 0.90133927], [0.9613677, 1.05826987, 0.90832055], [1.05370439, 0.96084864, 0.93923374]] np.random.seed(342837482) num_group = 100 ar_param = 0.5 k = 3 ga = Gaussian() for gsize in 1, 2, 3: ix = np.arange(gsize)[:, None] - np.arange(gsize)[None, :] ix = np.abs(ix) cmat = ar_param**ix cmat_r = np.linalg.cholesky(cmat) endog = [] exog = [] groups = [] for i in range(num_group): x = np.random.normal(size=(gsize, k)) exog.append(x) expval = x.sum(1) errors = np.dot(cmat_r, np.random.normal(size=gsize)) endog.append(expval + errors) groups.append(i * np.ones(gsize)) endog = np.concatenate(endog) groups = np.concatenate(groups) exog = np.concatenate(exog, axis=0) ar = Autoregressive() md = GEE(endog, exog, groups, family=ga, cov_struct=ar) mdf = md.fit() assert_almost_equal(ar.dep_params, dep_params_true[gsize - 1]) assert_almost_equal(mdf.params, params_true[gsize - 1])
def test_logistic(self): """ R code for comparing results: library(gee) Z = read.csv("results/gee_logistic_1.csv", header=FALSE) Y = Z[,2] Id = Z[,1] X1 = Z[,3] X2 = Z[,4] X3 = Z[,5] mi = gee(Y ~ X1 + X2 + X3, id=Id, family=binomial, corstr="independence") smi = summary(mi) u = coefficients(smi) cfi = paste(u[,1], collapse=",") sei = paste(u[,4], collapse=",") me = gee(Y ~ X1 + X2 + X3, id=Id, family=binomial, corstr="exchangeable") sme = summary(me) u = coefficients(sme) cfe = paste(u[,1], collapse=",") see = paste(u[,4], collapse=",") ma = gee(Y ~ X1 + X2 + X3, id=Id, family=binomial, corstr="AR-M") sma = summary(ma) u = coefficients(sma) cfa = paste(u[,1], collapse=",") sea = paste(u[,4], collapse=",") sprintf("cf = [[%s],[%s],[%s]]", cfi, cfe, cfa) sprintf("se = [[%s],[%s],[%s]]", sei, see, sea) """ endog, exog, group = load_data("gee_logistic_1.csv") # Time values for the autoregressive model T = np.zeros(len(endog)) idx = set(group) for ii in idx: jj = np.flatnonzero(group == ii) T[jj] = range(len(jj)) family = Binomial() ve = Exchangeable() vi = Independence() va = Autoregressive() # From R gee cf = [[ 0.0167272965285882, 1.13038654425893, -1.86896345082962, 1.09397608331333 ], [ 0.0178982283915449, 1.13118798191788, -1.86133518416017, 1.08944256230299 ], [ 0.0109621937947958, 1.13226505028438, -1.88278757333046, 1.09954623769449 ]] se = [[ 0.127291720283049, 0.166725808326067, 0.192430061340865, 0.173141068839597 ], [ 0.127045031730155, 0.165470678232842, 0.192052750030501, 0.173174779369249 ], [ 0.127240302296444, 0.170554083928117, 0.191045527104503, 0.169776150974586 ]] for j, v in enumerate((vi, ve, va)): md = GEE(endog, exog, group, T, family, v) mdf = md.fit() if id(v) != id(va): assert_almost_equal(mdf.params, cf[j], decimal=6) assert_almost_equal(mdf.standard_errors(), se[j], decimal=6) # Test with formulas D = np.concatenate((endog[:, None], group[:, None], exog[:, 1:]), axis=1) D = pd.DataFrame(D) D.columns = [ "Y", "Id", ] + ["X%d" % (k + 1) for k in range(exog.shape[1] - 1)] for j, v in enumerate((vi, ve)): md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=D.loc[:, "Id"], family=family, covstruct=v) mdf = md.fit() assert_almost_equal(mdf.params, cf[j], decimal=6) assert_almost_equal(mdf.standard_errors(), se[j], decimal=6) # Check for run-time exceptions in summary print mdf.summary()