def test_fast_scanner_statsmodel_gls(): import statsmodels.api as sm from numpy.linalg import lstsq def _lstsq(A, B): return lstsq(A, B, rcond=None)[0] data = sm.datasets.longley.load() data.exog = sm.add_constant(data.exog) ols_resid = sm.OLS(data.endog, data.exog).fit().resid resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit() rho = resid_fit.params[1] order = toeplitz(range(len(ols_resid))) sigma = rho ** order QS = economic_qs(sigma) lmm = LMM(data.endog, data.exog, QS) lmm.fit(verbose=False) sigma = lmm.covariance() scanner = lmm.get_fast_scanner() best_beta_se = _lstsq(data.exog.T @ _lstsq(lmm.covariance(), data.exog), eye(7)) best_beta_se = sqrt(best_beta_se.diagonal()) assert_allclose(scanner.null_beta_se, best_beta_se, atol=1e-5) endog = data.endog.copy() endog -= endog.mean(0) endog /= endog.std(0) exog = data.exog.copy() exog -= exog.mean(0) with errstate(invalid="ignore", divide="ignore"): exog /= exog.std(0) exog[:, 0] = 1 lmm = LMM(endog, exog, QS) lmm.fit(verbose=False) sigma = lmm.covariance() scanner = lmm.get_fast_scanner() gls_model = sm.GLS(endog, exog, sigma=sigma) gls_results = gls_model.fit() beta_se = gls_results.bse our_beta_se = sqrt(scanner.null_beta_covariance.diagonal()) # statsmodels scales the covariance matrix we pass, that is why # we need to account for it here. assert_allclose(our_beta_se, beta_se / sqrt(gls_results.scale)) assert_allclose(scanner.null_beta_se, beta_se / sqrt(gls_results.scale))
def test_lmm_interface(): random = RandomState(1) n = 3 G = random.randn(n, n + 1) X = random.randn(n, 2) y = X @ random.randn(2) + G @ random.randn(G.shape[1]) + random.randn(n) y -= y.mean(0) y /= y.std(0) QS = economic_qs_linear(G) lmm = LMM(y, X, QS, restricted=False) lmm.name = "lmm" lmm.fit(verbose=False) assert_allclose( lmm.covariance(), [ [0.436311031439718, 2.6243891396439837e-16, 2.0432156171727483e-16], [2.6243891396439837e-16, 0.4363110314397185, 4.814313140426306e-16], [2.0432156171727483e-16, 4.814313140426305e-16, 0.43631103143971817], ], atol=1e-7, ) assert_allclose( lmm.mean(), [0.6398184791042468, -0.8738254794097052, 0.7198112606871158], atol=1e-7, ) assert_allclose(lmm.lml(), -3.012715726960625, atol=1e-7) assert_allclose(lmm.value(), lmm.lml(), atol=1e-7) assert_allclose(lmm.lml(), -3.012715726960625, atol=1e-7) assert_allclose( lmm.X, [ [-0.3224172040135075, -0.38405435466841564], [1.1337694423354374, -1.0998912673140309], [-0.17242820755043575, -0.8778584179213718], ], atol=1e-7, ) assert_allclose(lmm.beta, [-1.3155159120000266, -0.5615702941530938], atol=1e-7) assert_allclose( lmm.beta_covariance, [ [0.44737305797088345, 0.20431961864892412], [0.20431961864892412, 0.29835835133251526], ], atol=1e-7, ) assert_allclose(lmm.delta, 0.9999999999999998, atol=1e-7) assert_equal(lmm.ncovariates, 2) assert_equal(lmm.nsamples, 3) assert_allclose(lmm.scale, 0.43631103143971767, atol=1e-7) assert_allclose(lmm.v0, 9.688051060046502e-17, atol=1e-7) assert_allclose(lmm.v1, 0.43631103143971756, atol=1e-7) assert_equal(lmm.name, "lmm") with pytest.raises(NotImplementedError): lmm.gradient()
def test_lmm_scan(): random = RandomState(9458) n = 30 X = _covariates_sample(random, n, n + 1) offset = 1.0 y = _outcome_sample(random, offset, X) QS = economic_qs_linear(X) M0 = random.randn(n, 2) M1 = random.randn(n, 2) lmm = LMM(y, M0, QS) lmm.fit(verbose=False) v0 = lmm.v0 v1 = lmm.v1 K = v0 * X @ X.T + v1 * eye(n) M = concatenate((M0, M1), axis=1) def fun(x): beta = x[:4] scale = exp(x[4]) return -st.multivariate_normal(M @ beta, scale * K).logpdf(y) res = minimize(fun, [0, 0, 0, 0, 0]) scanner = lmm.get_fast_scanner() r = scanner.scan(M1) assert_allclose(r["lml"], -res.fun) assert_allclose(r["effsizes0"], res.x[:2], rtol=1e-5) assert_allclose(r["effsizes1"], res.x[2:4], rtol=1e-5) assert_allclose(r["scale"], exp(res.x[4]), rtol=1e-5) K = r["scale"] * lmm.covariance() M = concatenate((M0, M1), axis=1) effsizes_se = sqrt(inv(M.T @ solve(K, M)).diagonal()) assert_allclose(effsizes_se, concatenate((r["effsizes0_se"], r["effsizes1_se"]))) assert_allclose(scanner.null_lml(), -53.805721275578456, rtol=1e-5) assert_allclose(scanner.null_beta, [0.26521964226797085, 0.4334778669761928], rtol=1e-5) assert_allclose( scanner.null_beta_covariance, [ [0.06302553593799207, 0.00429640179038484], [0.004296401790384839, 0.05591392416235412], ], rtol=1e-5, ) assert_allclose(scanner.null_scale, 1.0) assert_allclose(scanner.null_beta, lmm.beta, rtol=1e-5) assert_allclose(scanner.null_beta_covariance, lmm.beta_covariance, rtol=1e-5)
def test_lmm_predict(): random = RandomState(9458) n = 30 X = random.randn(n, n + 1) X -= X.mean(0) X /= X.std(0) X /= sqrt(X.shape[1]) offset = 1.0 mean = OffsetMean(n) mean.offset = offset cov_left = LinearCov(X) cov_left.scale = 1.5 cov_right = EyeCov(n) cov_right.scale = 1.5 cov = SumCov([cov_left, cov_right]) lik = DeltaProdLik() y = GGPSampler(lik, mean, cov).sample(random) QS = economic_qs_linear(X) lmm = LMM(y, ones((n, 1)), QS) lmm.fit(verbose=False) plmm = LMMPredict(y, lmm.beta, lmm.v0, lmm.v1, lmm.mean(), lmm.covariance()) K = dot(X, X.T) pm = plmm.predictive_mean(ones((n, 1)), K, K.diagonal()) assert_allclose(corrcoef(y, pm)[0, 1], 0.8358820971891354)
def test_fast_scanner_statsmodel_gls(): from numpy.linalg import lstsq def _lstsq(A, B): return lstsq(A, B, rcond=None)[0] # data = sm.datasets.longley.load() # data.exog = sm.add_constant(data.exog) # ols_resid = sm.OLS(data.endog, data.exog).fit().resid # resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit() # rho = resid_fit.params[1] rho = -0.3634294908774683 # order = toeplitz(range(len(ols_resid))) order = toeplitz(range(16)) sigma = rho**order QS = economic_qs(sigma) endog = reshape( [ 60323.0, 61122.0, 60171.0, 61187.0, 63221.0, 63639.0, 64989.0, 63761.0, 66019.0, 67857.0, 68169.0, 66513.0, 68655.0, 69564.0, 69331.0, 70551.0, ], (16, ), ) exog = reshape( [ 1.0, 83.0, 234289.0, 2356.0, 1590.0, 107608.0, 1947.0, 1.0, 88.5, 259426.0, 2325.0, 1456.0, 108632.0, 1948.0, 1.0, 88.2, 258054.0, 3682.0, 1616.0, 109773.0, 1949.0, 1.0, 89.5, 284599.0, 3351.0, 1650.0, 110929.0, 1950.0, 1.0, 96.2, 328975.0, 2099.0, 3099.0, 112075.0, 1951.0, 1.0, 98.1, 346999.0, 1932.0, 3594.0, 113270.0, 1952.0, 1.0, 99.0, 365385.0, 1870.0, 3547.0, 115094.0, 1953.0, 1.0, 100.0, 363112.0, 3578.0, 3350.0, 116219.0, 1954.0, 1.0, 101.2, 397469.0, 2904.0, 3048.0, 117388.0, 1955.0, 1.0, 104.6, 419180.0, 2822.0, 2857.0, 118734.0, 1956.0, 1.0, 108.4, 442769.0, 2936.0, 2798.0, 120445.0, 1957.0, 1.0, 110.8, 444546.0, 4681.0, 2637.0, 121950.0, 1958.0, 1.0, 112.6, 482704.0, 3813.0, 2552.0, 123366.0, 1959.0, 1.0, 114.2, 502601.0, 3931.0, 2514.0, 125368.0, 1960.0, 1.0, 115.7, 518173.0, 4806.0, 2572.0, 127852.0, 1961.0, 1.0, 116.9, 554894.0, 4007.0, 2827.0, 130081.0, 1962.0, ], (16, 7), ) lmm = LMM(endog, exog, QS) lmm.fit(verbose=False) sigma = lmm.covariance() scanner = lmm.get_fast_scanner() best_beta_se = _lstsq(exog.T @ _lstsq(lmm.covariance(), exog), eye(7)) best_beta_se = sqrt(best_beta_se.diagonal()) assert_allclose(scanner.null_beta_se, best_beta_se, atol=1e-4) endog = endog.copy() endog -= endog.mean(0) endog /= endog.std(0) exog = exog.copy() exog -= exog.mean(0) with errstate(invalid="ignore", divide="ignore"): exog /= exog.std(0) exog[:, 0] = 1 lmm = LMM(endog, exog, QS) lmm.fit(verbose=False) sigma = lmm.covariance() scanner = lmm.get_fast_scanner() # gls_model = sm.GLS(endog, exog, sigma=sigma) # gls_results = gls_model.fit() # scale = gls_results.scale scale = 1.7777777777782937 # beta_se = gls_results.bse beta_se = array([ 0.014636888951505144, 0.21334653097414055, 0.7428559936739378, 0.10174713767252333, 0.032745906589939845, 0.3494488802468581, 0.4644879873404213, ]) our_beta_se = sqrt(scanner.null_beta_covariance.diagonal()) # statsmodels scales the covariance matrix we pass, that is why # we need to account for it here. assert_allclose(our_beta_se, beta_se / sqrt(scale), rtol=1e-6) assert_allclose(scanner.null_beta_se, beta_se / sqrt(scale), rtol=1e-6)