def test_equivalence(self): """ The Equivalence covariance structure can represent an exchangeable covariance structure. Here we check that the results are identical using the two approaches. """ np.random.seed(3424) endog = np.random.normal(size=20) exog = np.random.normal(size=(20, 2)) exog[:, 0] = 1 groups = np.kron(np.arange(5), np.ones(4)) groups[12:] = 3 # Create unequal size groups # Set up an Equivalence covariance structure to mimic an # Exchangeable covariance structure. pairs = {} start = [0, 4, 8, 12] for k in range(4): pairs[k] = {} # Diagonal values (variance parameters) if k < 3: pairs[k][0] = (start[k] + np.r_[0, 1, 2, 3], start[k] + np.r_[0, 1, 2, 3]) else: pairs[k][0] = (start[k] + np.r_[0, 1, 2, 3, 4, 5, 6, 7], start[k] + np.r_[0, 1, 2, 3, 4, 5, 6, 7]) # Off-diagonal pairs (covariance parameters) if k < 3: a, b = np.tril_indices(4, -1) pairs[k][1] = (start[k] + a, start[k] + b) else: a, b = np.tril_indices(8, -1) pairs[k][1] = (start[k] + a, start[k] + b) ex = sm.cov_struct.Exchangeable() model1 = sm.GEE(endog, exog, groups, cov_struct=ex) result1 = model1.fit() for return_cov in False, True: ec = sm.cov_struct.Equivalence(pairs, return_cov=return_cov) model2 = sm.GEE(endog, exog, groups, cov_struct=ec) result2 = model2.fit() # Use large atol/rtol for the correlation case since there # are some small differences in the results due to degree # of freedom differences. if return_cov == True: atol, rtol = 1e-6, 1e-6 else: atol, rtol = 1e-3, 1e-3 assert_allclose(result1.params, result2.params, atol=atol, rtol=rtol) assert_allclose(result1.bse, result2.bse, atol=atol, rtol=rtol) assert_allclose(result1.scale, result2.scale, atol=atol, rtol=rtol)
def dosim(hyp, cov_struct=None, mcrep=500): # Storage for the simulation results scales = [[], []] # P-values from the score test pv = [] # Monte Carlo loop for k in range(mcrep): # Generate random "probability points" u that are uniformly # distributed, and correlated within clusters z = np.random.normal(size=n) u = np.random.normal(size=n // m) u = np.kron(u, np.ones(m)) z = r * z + np.sqrt(1 - r**2) * u u = norm.cdf(z) # Generate the observed responses y = negbinom(u, mu=mu[hyp], scale=scale) # Fit the null model m0 = sm.GEE(y, x0, groups=grp, cov_struct=cov_struct, family=sm.families.Poisson()) r0 = m0.fit(scale='X2') scales[0].append(r0.scale) # Fit the alternative model m1 = sm.GEE(y, x, groups=grp, cov_struct=cov_struct, family=sm.families.Poisson()) r1 = m1.fit(scale='X2') scales[1].append(r1.scale) # Carry out the score test st = m1.compare_score_test(r0) pv.append(st["p-value"]) pv = np.asarray(pv) rslt = [np.mean(pv), np.mean(pv < 0.1)] return rslt, scales
def vcfassoc(formula, covariate_df, groups=None): y, X = patsy.dmatrices(str(formula), covariate_df, return_type='dataframe') # get the column containing genotype ix = get_genotype_ix(X) Binomial = sm.families.Binomial logit = sm.families.links.Logit() if groups is not None: #covariate_df['grps'] = map(str, range(len(covariate_df) / 8)) * 8 if not isinstance(groups, (pd.DataFrame, np.ndarray)): cov = Exchangeable() model = sm.GEE(y, X, groups=covariate_df[groups], cov_struct=cov, family=Binomial()) else: model = sm.GLS(logit(y), X, sigma=groups.ix[X.index, X.index]) else: model = sm.GLM(y, X, missing='drop', family=Binomial()) result = model.fit(maxiter=1000) res = { 'OR': np.exp(result.params[ix]), 'pvalue': result.pvalues[ix], 'z': result.tvalues[ix], 'OR_CI': tuple(np.exp(result.conf_int().ix[ix, :])), } try: res['df_resid'] = result.df_resid except AttributeError: pass return res
def test_margins_gaussian(self): """ Check marginal effects for a Gaussian GEE fit. Marginal effects and ordinary effects should be equal. """ n = 40 np.random.seed(34234) exog = np.random.normal(size=(n, 3)) exog[:, 0] = 1 groups = np.kron(np.arange(n/4), np.r_[1, 1, 1, 1]) params = np.r_[0, 1, -1] lin_pred = np.dot(exog, params) prob = 1 / (1 + np.exp(-lin_pred)) endog = exog[:, 1] + np.random.normal(size=n) model = sm.GEE(endog, exog, groups) result = model.fit(start_params=[-4.88085602e-04, 1.18501903, 4.78820100e-02]) marg = result.get_margeff() assert_allclose(marg.margeff, result.params[1:]) assert_allclose(marg.margeff_se, result.bse[1:])
def test_equivalence_from_pairs(self): np.random.seed(3424) endog = np.random.normal(size=50) exog = np.random.normal(size=(50, 2)) exog[:, 0] = 1 groups = np.kron(np.arange(5), np.ones(10)) groups[30:] = 3 # Create unequal size groups # Set up labels. labels = np.kron(np.arange(5), np.ones(10)).astype(np.int32) labels = labels[np.random.permutation(len(labels))] eq = sm.cov_struct.Equivalence(labels=labels, return_cov=True) model1 = sm.GEE(endog, exog, groups, cov_struct=eq) # Call this directly instead of letting init do it to get the # result before reindexing. eq._pairs_from_labels() # Make sure the size is correct to hold every element. for g in model1.group_labels: p = eq.pairs[g] vl = [len(x[0]) for x in p.values()] m = sum(groups == g) assert_allclose(sum(vl), m*(m+1)/2) # Check for duplicates. ixs = set([]) for g in model1.group_labels: for v in eq.pairs[g].values(): for a, b in zip(v[0], v[1]): ky = (a, b) assert(ky not in ixs) ixs.add(ky) # Smoke test eq = sm.cov_struct.Equivalence(labels=labels, return_cov=True) model1 = sm.GEE(endog, exog, groups, cov_struct=eq) result1 = model1.fit(maxiter=2)
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) y_count = np.random.poisson(np.exp(x.sum(1) - x.mean())) groups = np.random.randint(0, 4, size=x.shape[0]) # use start_params to speed up test, difficult convergence not tested start_params = np.array([0., 1., 1., 1.]) vi = sm.cov_struct.Independence() family = sm.families.Poisson() self.results = sm.GEE(y_count, self.exog, groups, family=family, cov_struct=vi).fit(start_params=start_params)
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) #y_count = np.random.poisson(np.exp(x.sum(1) - x.mean())) y_count = np.random.poisson(np.exp(x.sum(1) - x.sum(1).mean(0))) groups = np.random.randint(0, 4, size=x.shape[0]) # use start_params to speed up test, difficult convergence not tested start_params = np.array([0., 1., 1., 1.]) # params_est = np.array([-0.0063238 , 0.99463752, 1.02790201, 0.98080081]) vi = sm.cov_struct.Independence() family = sm.families.Poisson() mod = sm.GEE(y_count, self.exog, groups, family=family, cov_struct=vi) self.results = mod.fit(start_params=start_params, cov_type='bias_reduced')
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) #y_count = np.random.poisson(np.exp(x.sum(1) - x.mean())) y_count = np.random.poisson(np.exp(x.sum(1) - x.sum(1).mean(0))) groups = np.random.randint(0, 4, size=x.shape[0]) # use start_params to speed up test, difficult convergence not tested start_params = np.array([0., 1., 1., 1.]) # no sm. import # vi = sm.dependence_structures.Independence() from statsmodels.genmod.dependence_structures import Independence vi = Independence() family = sm.families.Poisson() self.results = sm.GEE(y_count, self.exog, groups, family=family, cov_struct=vi).fit(start_params=start_params, cov_type='naive')
def test_margins_poisson(self): """ Check marginal effects for a Poisson GEE fit. """ np.random.seed(34234) endog = np.r_[10, 15, 12, 13, 20, 18, 26, 29] exog = np.ones((8, 2)) exog[:, 1] = np.r_[0, 0, 0, 0, 1, 1, 1, 1] groups = np.arange(8) model = sm.GEE(endog, exog, groups, family=sm.families.Poisson()) result = model.fit(cov_type='naive', start_params=[2.52572864, 0.62057649]) marg = result.get_margeff() assert_allclose(marg.margeff, np.r_[11.0928], rtol=1e-6) assert_allclose(marg.margeff_se, np.r_[3.269015], rtol=1e-6)
def test_plots(): np.random.seed(378) exog = np.random.normal(size=100) endog = np.random.normal(size=(100, 2)) groups = np.kron(np.arange(50), np.r_[1, 1]) model = sm.GEE(exog, endog, groups) result = model.fit() import matplotlib.pyplot as plt # Smoke tests fig = result.plot_added_variable(1) plt.close(fig) fig = result.plot_partial_residuals(1) plt.close(fig) fig = result.plot_ceres_residuals(1) plt.close(fig)
def test_margins_logistic(self): """ Check marginal effects for a binomial GEE fit. Comparison comes from Stata. """ np.random.seed(34234) endog = np.r_[0, 0, 0, 0, 1, 1, 1, 1] exog = np.ones((8, 2)) exog[:, 1] = np.r_[1, 2, 1, 1, 2, 1, 2, 2] groups = np.arange(8) model = sm.GEE(endog, exog, groups, family=sm.families.Binomial()) result = model.fit(cov_type='naive', start_params=[-3.29583687, 2.19722458]) marg = result.get_margeff() assert_allclose(marg.margeff, np.r_[0.4119796]) assert_allclose(marg.margeff_se, np.r_[0.1379962], rtol=1e-6)
def test_multinomial(self): """ Check the 2-class multinomial (nominal) GEE fit against logistic regression. """ np.random.seed(34234) endog = np.r_[0, 0, 0, 0, 1, 1, 1, 1] exog = np.ones((8, 2)) exog[:, 1] = np.r_[1, 2, 1, 1, 2, 1, 2, 2] groups = np.arange(8) model = sm.NominalGEE(endog, exog, groups) results = model.fit(cov_type='naive', start_params=[3.295837, -2.197225]) logit_model = sm.GEE(endog, exog, groups, family=sm.families.Binomial()) logit_results = logit_model.fit(cov_type='naive') assert_allclose(results.params, -logit_results.params, rtol=1e-5) assert_allclose(results.bse, logit_results.bse, rtol=1e-5)
import statsmodels.api as sm import statsmodels.formula.api as smf spector_data = sm.datasets.spector.load() family = sm.families.Binomial() va = sm.cov_struct.Autoregressive() model = sm.GEE(spector_data.endog, spector_data.exog, spector_data.group, family=family, cov_struct=va) result = model.fit() print(result.summary())