def test_mixedlm_balances(self): res = mixedlm("x1 + x2", self.table, self.metadata, groups="groups") res.fit() exp_pvalues = pd.DataFrame( [[0.0994110906314, 4.4193804e-05, 3.972325e-35, 3.568599e-30], [4.82688604e-236, 4.4193804e-05, 3.972325e-35, 3.568599e-30]], index=['y1', 'y2'], columns=['Intercept', 'Group Var', 'x1', 'x2']).T res_pvals = res.pvalues.sort_index(axis=0).sort_index(axis=1) exp_pvals = exp_pvalues.sort_index(axis=0).sort_index(axis=1) pdt.assert_frame_equal(res_pvals, exp_pvals, check_less_precise=True) exp_coefficients = pd.DataFrame( [[0.211451, 0.0935786, 1.022008, 0.924873], [4.211451, 0.0935786, 1.022008, 0.924873]], columns=['Intercept', 'Group Var', 'x1', 'x2'], index=['y1', 'y2']).sort_index().T res_coef = res.coefficients().sort_index(axis=0).sort_index(axis=1) exp_coef = exp_coefficients.sort_index(axis=0).sort_index(axis=1) pdt.assert_frame_equal(res_coef, exp_coef, check_less_precise=True)
def test_mixedlm_balances(self): res = mixedlm("x1 + x2", self.table, self.metadata, self.tree, groups="groups") res.fit() exp_pvalues = pd.DataFrame( [[4.923122e-236, 3.180390e-40, 3.972325e-35, 3.568599e-30], [9.953418e-02, 3.180390e-40, 3.972325e-35, 3.568599e-30]], index=['Y1', 'Y2'], columns=['Intercept', 'Intercept RE', 'x1', 'x2']) pdt.assert_frame_equal(res.pvalues, exp_pvalues, check_less_precise=True) exp_coefficients = pd.DataFrame( [[4.211451, -0.305906, 1.022008, 0.924873], [0.211451, -0.305906, 1.022008, 0.924873]], columns=['Intercept', 'Intercept RE', 'x1', 'x2'], index=['Y1', 'Y2']) pdt.assert_frame_equal(res.coefficients(), exp_coefficients, check_less_precise=True)
def test_percent_explained(self): model = mixedlm("x1 + x2", self.table, self.metadata, groups="groups") model.fit() res = model.percent_explained() exp = pd.Series([0.5, 0.5], index=['y1', 'y2']) pdt.assert_series_equal(res, exp, check_less_precise=True)
def test_summary_head(self): model = mixedlm("x1 + x2", self.table, self.metadata, self.tree, groups="groups") model.fit() res = str(model.summary(ndim=1)) fname = get_data_path('exp_lme_results2.txt') with open(fname, 'r') as fh: exp = fh.read() self.assertEqual(res, exp)
def test_visualization(self): model = mixedlm("x1 + x2", self.table, self.metadata, groups="groups") model.fit() lme_summary(self.results, model, self.tree) pvals = pd.read_csv(os.path.join(self.results, 'pvalues.csv'), index_col=0) coefs = pd.read_csv(os.path.join(self.results, 'coefficients.csv'), index_col=0) pred = pd.read_csv(os.path.join(self.results, 'predicted.csv'), index_col=0) resid = pd.read_csv(os.path.join(self.results, 'residuals.csv'), index_col=0) exp_pvals = pd.DataFrame({ 'Intercept': {'Y1': 4.8268860492262526e-236, 'Y2': 0.099411090631406948}, 'Group Var': {'Y1': 4.4193804668281966e-05, 'Y2': 4.4193804668280984e-05}, 'x1': {'Y1': 3.9704936434633392e-35, 'Y2': 3.9704936434628853e-35}, 'x2': {'Y1': 3.56912071867573e-30, 'Y2': 3.56912071867573e-30}}).sort_index(axis=1) pvals = pvals.sort_index(axis=0).sort_index(axis=1) exp_pvals = exp_pvals.sort_index(axis=0).sort_index(axis=1) npt.assert_allclose(pvals, exp_pvals, rtol=1e-5) exp_coefs = pd.DataFrame({ 'Intercept': {'Y1': 4.2115280233151946, 'Y2': 0.211528023315187}, 'Group Var': {'Y1': 0.093578639287859755, 'Y2': 0.093578639287860019}, 'x1': {'Y1': 1.0220072967452645, 'Y2': 1.0220072967452651}, 'x2': {'Y1': 0.92487193877761575, 'Y2': 0.92487193877761564}} ).sort_index(axis=1) npt.assert_allclose(coefs.sort_index(axis=0), exp_coefs.sort_index(axis=0), rtol=1e-2, atol=1e-2) exp_resid = pd.read_csv(get_data_path('exp_resid.csv'), index_col=0) npt.assert_allclose(resid, exp_resid.T, rtol=1e-2, atol=1e-2) exp_pred = pd.read_csv(get_data_path('exp_pred.csv'), index_col=0) npt.assert_allclose(pred, exp_pred.T, rtol=1e-2, atol=1e-2)
def test_mixedlm_zero_error(self): table = pd.DataFrame({ 's1': [0, 0, 0], 's2': [0, 0, 0], 's3': [0, 0, 0], 's4': [0, 0, 0], 's5': [0, 0, 0], 's6': [0, 0, 0]}, index=['a', 'b', 'c']).T tree = TreeNode.read(['((c,d),(b,a)Y2)Y1;']) metadata = pd.DataFrame({ 'lame': [1, 1, 1, 2, 2], 'real': [1, 2, 3, 4, 5] }, index=['s1', 's2', 's3', 's4', 's5']) with self.assertRaises(ValueError): res = mixedlm('real + lame', table, metadata, tree, groups='lame') res.fit()
def test_visualization(self): model = mixedlm("x1 + x2", self.table, self.metadata, groups="groups") model.fit() lme_summary(self.results, model, self.tree) pvals = pd.read_csv(os.path.join(self.results, 'pvalues.csv'), index_col=0) coefs = pd.read_csv(os.path.join(self.results, 'coefficients.csv'), index_col=0) pred = pd.read_csv(os.path.join(self.results, 'predicted.csv'), index_col=0) resid = pd.read_csv(os.path.join(self.results, 'residuals.csv'), index_col=0) exp_pvals = pd.DataFrame({ 'Intercept': {'Y1': 4.8268860492262526e-236, 'Y2': 0.099411090631406948}, 'groups RE': {'Y1': 4.4193804668281966e-05, 'Y2': 4.4193804668280984e-05}, 'x1': {'Y1': 3.9704936434633392e-35, 'Y2': 3.9704936434628853e-35}, 'x2': {'Y1': 3.56912071867573e-30, 'Y2': 3.56912071867573e-30}}) npt.assert_allclose(pvals, exp_pvals, rtol=1e-5) exp_coefs = pd.DataFrame({ 'Intercept': {'Y1': 4.2115280233151946, 'Y2': 0.211528023315187}, 'groups RE': {'Y1': 0.093578639287859755, 'Y2': 0.093578639287860019}, 'x1': {'Y1': 1.0220072967452645, 'Y2': 1.0220072967452651}, 'x2': {'Y1': 0.92487193877761575, 'Y2': 0.92487193877761564}} ) npt.assert_allclose(coefs, exp_coefs, rtol=1e-2, atol=1e-2) exp_resid = pd.read_csv(get_data_path('exp_resid.csv'), index_col=0) npt.assert_allclose(resid, exp_resid.T, rtol=1e-2, atol=1e-2) exp_pred = pd.read_csv(get_data_path('exp_pred.csv'), index_col=0) npt.assert_allclose(pred, exp_pred.T, rtol=1e-2, atol=1e-2)
def test_mixedlm_balances(self): res = mixedlm("x1 + x2", self.table, self.metadata, groups="groups") res.fit() exp_pvalues = pd.DataFrame( [[0.0994110906314, 4.4193804e-05, 3.972325e-35, 3.568599e-30], [4.82688604e-236, 4.4193804e-05, 3.972325e-35, 3.568599e-30]], index=['y1', 'y2'], columns=['Intercept', 'groups RE', 'x1', 'x2']).sort_index().T pdt.assert_frame_equal(res.pvalues, exp_pvalues, check_less_precise=True) exp_coefficients = pd.DataFrame( [[0.211451, 0.0935786, 1.022008, 0.924873], [4.211451, 0.0935786, 1.022008, 0.924873]], columns=['Intercept', 'groups RE', 'x1', 'x2'], index=['y1', 'y2']).sort_index().T pdt.assert_frame_equal(res.coefficients(), exp_coefficients, check_less_precise=True)
def test_mixedlm_balances_vcf(self): np.random.seed(6241) n = 1600 exog = np.random.normal(size=(n, 2)) groups = np.kron(np.arange(n // 16), np.ones(16)) # Build up the random error vector errors = 0 # The random effects exog_re = np.random.normal(size=(n, 2)) slopes = np.random.normal(size=(n // 16, 2)) slopes = np.kron(slopes, np.ones((16, 1))) * exog_re errors += slopes.sum(1) # First variance component subgroups1 = np.kron(np.arange(n // 4), np.ones(4)) errors += np.kron(2 * np.random.normal(size=n // 4), np.ones(4)) # Second variance component subgroups2 = np.kron(np.arange(n // 2), np.ones(2)) errors += np.kron(2 * np.random.normal(size=n // 2), np.ones(2)) # iid errors errors += np.random.normal(size=n) endog = exog.sum(1) + errors df = pd.DataFrame(index=range(n)) df["y1"] = endog df["y2"] = endog + 2 * 2 df["groups"] = groups df["x1"] = exog[:, 0] df["x2"] = exog[:, 1] df["z1"] = exog_re[:, 0] df["z2"] = exog_re[:, 1] df["v1"] = subgroups1 df["v2"] = subgroups2 table = df[["y1", "y2"]] metadata = df[['x1', 'x2', 'z1', 'z2', 'v1', 'v2', 'groups']] res = mixedlm("x1 + x2", table, metadata, groups="groups", re_formula="0+z1+z2") res.fit() exp_pvalues = pd.DataFrame([ [0.038015, 3.858750e-39, 2.245068e-33, 2.552217e-05, 0.923418, 6.645741e-34], [0.000000, 3.858750e-39, 2.245068e-33, 2.552217e-05, 0.923418, 6.645741e-34]], columns=['Intercept', 'x1', 'x2', 'z1 RE', 'z1 RE x z2 RE', 'z2 RE'], index=['y1', 'y2']).T exp_coefficients = pd.DataFrame( [[0.163141, 1.030013, 0.935514, 0.115082, -0.001962, 0.14792], [4.163141, 1.030013, 0.935514, 0.115082, -0.001962, 0.14792]], columns=['Intercept', 'x1', 'x2', 'z1 RE', 'z1 RE x z2 RE', 'z2 RE'], index=['y1', 'y2']).T pdt.assert_frame_equal(res.pvalues, exp_pvalues, check_less_precise=True) pdt.assert_frame_equal(res.coefficients(), exp_coefficients, check_less_precise=True)
def test_write(self): res = mixedlm("x1 + x2", self.table, self.metadata, self.tree, groups="groups") res.fit() res.write_pickle('lme.pickle')
def test_mixedlm_balances_vcf(self): np.random.seed(6241) n = 1600 exog = np.random.normal(size=(n, 2)) groups = np.kron(np.arange(n // 16), np.ones(16)) # Build up the random error vector errors = 0 # The random effects exog_re = np.random.normal(size=(n, 2)) slopes = np.random.normal(size=(n // 16, 2)) slopes = np.kron(slopes, np.ones((16, 1))) * exog_re errors += slopes.sum(1) # First variance component subgroups1 = np.kron(np.arange(n // 4), np.ones(4)) errors += np.kron(2 * np.random.normal(size=n // 4), np.ones(4)) # Second variance component subgroups2 = np.kron(np.arange(n // 2), np.ones(2)) errors += np.kron(2 * np.random.normal(size=n // 2), np.ones(2)) # iid errors errors += np.random.normal(size=n) endog = exog.sum(1) + errors df = pd.DataFrame(index=range(n)) df["y1"] = endog df["y2"] = endog + 2 * 2 df["groups"] = groups df["x1"] = exog[:, 0] df["x2"] = exog[:, 1] df["z1"] = exog_re[:, 0] df["z2"] = exog_re[:, 1] df["v1"] = subgroups1 df["v2"] = subgroups2 table = df[["y1", "y2"]] metadata = df[['x1', 'x2', 'z1', 'z2', 'v1', 'v2', 'groups']] res = mixedlm("x1 + x2", table, metadata, groups="groups", re_formula="0+z1+z2") res.fit() exp_pvalues = pd.DataFrame([ [0.038015, 3.858750e-39, 2.245068e-33, 2.552217e-05, 0.923418, 6.645741e-34], [0.000000, 3.858750e-39, 2.245068e-33, 2.552217e-05, 0.923418, 6.645741e-34]], columns=['Intercept', 'x1', 'x2', 'z1 Var', 'z1 x z2 Cov', 'z2 Var'], index=['y1', 'y2']).T exp_coefficients = pd.DataFrame( [[0.163141, 1.030013, 0.935514, 0.115082, -0.001962, 0.14792], [4.163141, 1.030013, 0.935514, 0.115082, -0.001962, 0.14792]], columns=['Intercept', 'x1', 'x2', 'z1 Var', 'z1 x z2 Cov', 'z2 Var'], index=['y1', 'y2']).T pdt.assert_frame_equal(res.pvalues.sort_index(axis=0), exp_pvalues.sort_index(axis=0), check_less_precise=True) pdt.assert_frame_equal(res.coefficients().sort_index(axis=0), exp_coefficients.sort_index(axis=0), check_less_precise=True)
def test_mixedlm_balances_vcf(self): np.random.seed(6241) n = 1600 exog = np.random.normal(size=(n, 2)) groups = np.kron(np.arange(n / 16), np.ones(16)) # Build up the random error vector errors = 0 # The random effects exog_re = np.random.normal(size=(n, 2)) slopes = np.random.normal(size=(n / 16, 2)) slopes = np.kron(slopes, np.ones((16, 1))) * exog_re errors += slopes.sum(1) # First variance component subgroups1 = np.kron(np.arange(n / 4), np.ones(4)) errors += np.kron(2 * np.random.normal(size=n // 4), np.ones(4)) # Second variance component subgroups2 = np.kron(np.arange(n / 2), np.ones(2)) errors += np.kron(2 * np.random.normal(size=n // 2), np.ones(2)) # iid errors errors += np.random.normal(size=n) endog = exog.sum(1) + errors df = pd.DataFrame(index=range(n)) df["y1"] = endog df["y2"] = endog + 2 * 2 df["groups"] = groups df["x1"] = exog[:, 0] df["x2"] = exog[:, 1] df["z1"] = exog_re[:, 0] df["z2"] = exog_re[:, 1] df["v1"] = subgroups1 df["v2"] = subgroups2 tree = TreeNode.read(['(c, (b,a)Y2)Y1;']) iv = ilr_inv(df[["y1", "y2"]].values) table = pd.DataFrame(iv, columns=['a', 'b', 'c']) metadata = df[['x1', 'x2', 'z1', 'z2', 'v1', 'v2', 'groups']] res = mixedlm("x1 + x2", table, metadata, tree, groups="groups", re_formula="0+z1+z2") res.fit() exp_pvalues = pd.DataFrame( [[4.923122e-236, 3.180390e-40, 3.972325e-35, 3.568599e-30], [9.953418e-02, 3.180390e-40, 3.972325e-35, 3.568599e-30]], index=['Y1', 'Y2'], columns=['Intercept', 'Intercept RE', 'x1', 'x2']) exp_pvalues = pd.DataFrame([[ 0.000000, 3.858750e-39, 2.245068e-33, 2.434437e-35, 0.776775, 6.645741e-34 ], [ 0.038015, 3.858750e-39, 2.245068e-33, 2.434437e-35, 0.776775, 6.645741e-34 ]], columns=[ 'Intercept', 'x1', 'x2', 'z1 RE', 'z1 RE x z2 RE', 'z2 RE' ], index=['Y1', 'Y2']) exp_coefficients = pd.DataFrame( [[4.163141, 1.030013, 0.935514, 0.339239, -0.005792, 0.38456], [0.163141, 1.030013, 0.935514, 0.339239, -0.005792, 0.38456]], columns=[ 'Intercept', 'x1', 'x2', 'z1 RE', 'z1 RE x z2 RE', 'z2 RE' ], index=['Y1', 'Y2']) pdt.assert_frame_equal(res.pvalues, exp_pvalues, check_less_precise=True) pdt.assert_frame_equal(res.coefficients(), exp_coefficients, check_less_precise=True)