def test_lmm_kron_scan(): random = RandomState(0) n = 20 Y = random.randn(n, 3) A = random.randn(3, 3) A = A @ A.T F = random.randn(n, 2) G = random.randn(n, 6) lmm = Kron2Sum(Y, A, F, G, restricted=True) lmm.fit(verbose=False) scan = lmm.get_fast_scanner() m = lmm.mean() K = lmm.covariance() def func(scale): mv = st.multivariate_normal(m, scale * K) return -mv.logpdf(vec(Y)) s = minimize(func, 1e-3, 5.0, 1e-5)[0] assert_allclose(scan.null_lml(), st.multivariate_normal(m, s * K).logpdf(vec(Y))) assert_allclose(kron(A, F) @ scan.null_beta, m) A1 = random.randn(3, 2) F1 = random.randn(n, 4) r = scan.scan(A1, F1) assert_allclose(r["scale"], 0.7365021111700154, rtol=1e-3) m = kron(A, F) @ vec(r["effsizes0"]) + kron(A1, F1) @ vec(r["effsizes1"]) def func(scale): mv = st.multivariate_normal(m, scale * K) return -mv.logpdf(vec(Y)) s = minimize(func, 1e-3, 5.0, 1e-5)[0] assert_allclose(r["lml"], st.multivariate_normal(m, s * K).logpdf(vec(Y))) r = scan.scan(empty((3, 0)), F1) assert_allclose(r["lml"], -85.36667704747371, rtol=1e-4) assert_allclose(r["scale"], 0.8999995537936586, rtol=1e-3) assert_allclose( r["effsizes0"], [ [0.21489119796865844, 0.6412947101778663, -0.7176143380221816], [0.8866722740598517, -0.18731140321348416, -0.26118052682069], ], rtol=1e-2, atol=1e-2, ) assert_allclose(r["effsizes1"], [])
def test_kron2sum_large_outcome(): random = RandomState(2) n = 50 A = random.randn(3, 3) A = A @ A.T F = random.randn(n, 2) G = random.randn(n, 4) B = random.randn(2, 3) C0 = random.randn(3, 3) C0 = C0 @ C0.T C1 = random.randn(3, 3) C1 = C1 @ C1.T K = kron(C0, (G @ G.T)) + kron(C1, eye(n)) y = multivariate_normal(random, kron(A, F) @ vec(B), K) Y = unvec(y, (n, 3)) Y = Y / Y.std(0) lmm = Kron2Sum(Y, A, F, G, restricted=False) lmm.fit(verbose=False) assert_allclose(lmm.lml(), -12.163158697588926) assert_allclose(lmm.C0[0, 1], -0.004781646218546575, rtol=1e-3, atol=1e-5) assert_allclose(lmm.C1[0, 1], 0.03454122242999587, rtol=1e-3, atol=1e-5) assert_allclose(lmm.beta[2], -0.02553979383437496, rtol=1e-3, atol=1e-5) assert_allclose(lmm.beta_covariance[0, 1], 0.0051326042358990865, rtol=1e-3, atol=1e-5) assert_allclose(lmm.mean()[3], 0.3442913781854699, rtol=1e-2, atol=1e-5) assert_allclose(lmm.covariance()[0, 1], 0.0010745698663887468, rtol=1e-3, atol=1e-5)
def beta(self): """ Fixed-effect sizes ЮЏЃ = vec(B). Returns ------- fixed-effects : ndarray ЮЏЃ from ЮЏЃ = vec(B). """ return vec(self.B)
def test_lmm_kron_scan_redundant(): random = RandomState(0) n = 30 Y = random.randn(n, 3) A = random.randn(3, 3) A = A @ A.T F = random.randn(n, 2) G = random.randn(n, 6) G = concatenate([G, G], axis=1) lmm = Kron2Sum(Y, A, F, G, restricted=True) lmm.fit(verbose=False) scan = lmm.get_fast_scanner() m = lmm.mean() K = lmm.covariance() def func(scale): mv = st.multivariate_normal(m, scale * K) return -mv.logpdf(vec(Y)) s = minimize(func, 1e-3, 5.0, 1e-5)[0] assert_allclose(scan.null_lml(), st.multivariate_normal(m, s * K).logpdf(vec(Y))) assert_allclose(kron(A, F) @ scan.null_beta, m) A1 = random.randn(3, 2) F1 = random.randn(n, 4) F1 = concatenate([F1, F1], axis=1) r = scan.scan(A1, F1) assert_allclose(r["scale"], 0.8843540849467378, rtol=1e-3) m = kron(A, F) @ vec(r["effsizes0"]) + kron(A1, F1) @ vec(r["effsizes1"]) def func(scale): mv = st.multivariate_normal(m, scale * K) return -mv.logpdf(vec(Y)) s = minimize(func, 1e-3, 5.0, 1e-5)[0] assert_allclose(r["lml"], st.multivariate_normal(m, s * K).logpdf(vec(Y)))
def test_lmm_kron_scan_with_lmm(): random = RandomState(0) n = 15 Y = random.randn(n, 3) A = random.randn(3, 3) A = A @ A.T F = random.randn(n, 2) G = random.randn(n, 6) klmm = Kron2Sum(Y, A, F, G, restricted=True) klmm.fit(verbose=False) kscan = klmm.get_fast_scanner() K = klmm.covariance() X = kron(A, F) QS = economic_qs(K) scan = FastScanner(vec(Y), X, QS, 0.0) assert_allclose(klmm.covariance(), K) assert_allclose(kscan.null_scale, scan.null_scale) assert_allclose(kscan.null_beta, scan.null_beta) assert_allclose(kscan.null_lml(), scan.null_lml()) assert_allclose(kscan.null_beta_covariance, scan.null_beta_covariance) A1 = random.randn(3, 2) F1 = random.randn(n, 2) M = kron(A1, F1) kr = kscan.scan(A1, F1) r = scan.scan(M) assert_allclose(kr["lml"], r["lml"]) assert_allclose(kr["scale"], r["scale"]) assert_allclose(vec(kr["effsizes0"]), r["effsizes0"]) assert_allclose(vec(kr["effsizes1"]), r["effsizes1"]) assert_allclose(vec(kr["effsizes0_se"]), r["effsizes0_se"]) assert_allclose(vec(kr["effsizes1_se"]), r["effsizes1_se"])
def null_scale(self): """ Optimal s according to the marginal likelihood. The optimal s is given by s = (n·p)⁻¹𝐲ᵀK⁻¹(𝐲 - 𝐦), where 𝐦 = (A ⊗ X)vec(𝚩) and 𝚩 is optimal. Returns ------- scale : float Optimal scale. """ np = self._nsamples * self._ntraits b = vec(self.null_beta) mKiy = b.T @ self._MKiy sqrtdot = self._yKiy - mKiy scale = sqrtdot / np return scale
def test_kron2sum_unrestricted_lml(): random = RandomState(0) Y = random.randn(5, 3) A = random.randn(3, 3) A = A @ A.T F = random.randn(5, 2) G = random.randn(5, 4) lmm = Kron2Sum(Y, A, F, G, restricted=False) y = vec(lmm._Y) m = lmm.mean() K = lmm.covariance() assert_allclose(lmm.lml(), st.multivariate_normal(m, K).logpdf(y)) lmm._cov.C0.Lu = random.randn(3) m = lmm.mean() K = lmm.covariance() assert_allclose(lmm.lml(), st.multivariate_normal(m, K).logpdf(y)) lmm._cov.C1.Lu = random.randn(6) m = lmm.mean() K = lmm.covariance() assert_allclose(lmm.lml(), st.multivariate_normal(m, K).logpdf(y))
def scan(self, A1, X1): """ LML, fixed-effect sizes, and scale of the candidate set. Parameters ---------- A1 : (p, e) array_like Trait-by-environments design matrix. X1 : (n, m) array_like Variants set matrix. Returns ------- lml : float Log of the marginal likelihood for the set. effsizes0 : (c, p) ndarray Fixed-effect sizes for the covariates. effsizes0_se : (c, p) ndarray Fixed-effect size standard errors for the covariates. effsizes1 : (m, e) ndarray Fixed-effect sizes for the candidates. effsizes1_se : (m, e) ndarray Fixed-effect size standard errors for the candidates. scale : float Optimal scale. """ from numpy import empty from numpy.linalg import multi_dot from numpy_sugar import epsilon, is_all_finite from scipy.linalg import cho_solve A1 = asarray(A1, float) X1 = asarray(X1, float) if not is_all_finite(A1): raise ValueError("A1 parameter has non-finite elements.") if not is_all_finite(X1): raise ValueError("X1 parameter has non-finite elements.") if A1.shape[1] == 0: beta_se = sqrt(self.null_beta_covariance.diagonal()) return { "lml": self.null_lml(), "effsizes0": unvec(self.null_beta, (self._ncovariates, -1)), "effsizes0_se": unvec(beta_se, (self._ncovariates, -1)), "effsizes1": empty((0, )), "effsizes1_se": empty((0, )), "scale": self.null_scale, } X1X1 = X1.T @ X1 XX1 = self._X.T @ X1 AWA1 = self._WA.T @ A1 A1W = A1.T @ self._W GX1 = self._G.T @ X1 MRiM1 = kron(AWA1, XX1) M1RiM1 = kron(A1W @ A1, X1X1) M1Riy = vec(multi_dot([X1.T, self._Y, A1W.T])) XRiM1 = kron(self._WL0.T @ A1, GX1) ZiXRiM1 = cho_solve(self._Lz, XRiM1) MRiXZiXRiM1 = self._XRiM.T @ ZiXRiM1 M1RiXZiXRiM1 = XRiM1.T @ ZiXRiM1 M1RiXZiXRiy = XRiM1.T @ self._ZiXRiy T0 = [[self._MRiM, MRiM1], [MRiM1.T, M1RiM1]] T1 = [[self._MRiXZiXRiM, MRiXZiXRiM1], [MRiXZiXRiM1.T, M1RiXZiXRiM1]] T2 = [self._MRiy, M1Riy] T3 = [self._MRiXZiXRiy, M1RiXZiXRiy] MKiM = block(T0) - block(T1) MKiy = block(T2) - block(T3) beta = rsolve(MKiM, MKiy) mKiy = beta.T @ MKiy cp = self._ntraits * self._ncovariates effsizes0 = unvec(beta[:cp], (self._ncovariates, self._ntraits)) effsizes1 = unvec(beta[cp:], (X1.shape[1], A1.shape[1])) np = self._nsamples * self._ntraits sqrtdot = self._yKiy - mKiy scale = clip(sqrtdot / np, epsilon.tiny, inf) lml = self._static_lml() / 2 - np * safe_log(scale) / 2 - np / 2 effsizes_se = sqrt( clip(scale * pinv(MKiM).diagonal(), epsilon.tiny, inf)) effsizes0_se = unvec(effsizes_se[:cp], (self._ncovariates, self._ntraits)) effsizes1_se = unvec(effsizes_se[cp:], (X1.shape[1], A1.shape[1])) return { "lml": lml, "effsizes0": effsizes0, "effsizes1": effsizes1, "scale": scale, "effsizes0_se": effsizes0_se, "effsizes1_se": effsizes1_se, }
def func(scale): mv = st.multivariate_normal(m, scale * K) return -mv.logpdf(vec(Y))
def _lml_gradient(self): """ Gradient of the log of the marginal likelihood. Let Юљ▓ = vec(Y), ЮЋѓ = KРЂ╗┬╣Рѕѓ(K)KРЂ╗┬╣, and H = MрхђKРЂ╗┬╣M. The gradient is given by:: 2РІЁРѕѓlog(p(Юљ▓)) = -tr(KРЂ╗┬╣РѕѓK) - tr(HРЂ╗┬╣РѕѓH) + Юљ▓рхђЮЋѓЮљ▓ - ЮљдрхђЮЋѓ(2РІЁЮљ▓-Юљд) - 2РІЁ(Юљд-Юљ▓)рхђKРЂ╗┬╣Рѕѓ(Юљд). Observe that РѕѓЮЏЃ = -HРЂ╗┬╣(РѕѓH)ЮЏЃ - HРЂ╗┬╣MрхђЮЋѓЮљ▓ and РѕѓH = -MрхђЮЋѓM. Let Z = I + XрхђRРЂ╗┬╣X and ЮЊА = RРЂ╗┬╣(РѕѓK)RРЂ╗┬╣. We use Woodbury matrix identity to write :: Юљ▓рхђЮЋѓЮљ▓ = Юљ▓рхђЮЊАЮљ▓ - 2(Юљ▓рхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣Юљ▓) + (Юљ▓рхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣Юљ▓) MрхђЮЋѓM = MрхђЮЊАM - 2(MрхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M) + (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M) MрхђЮЋѓЮљ▓ = MрхђЮЊАЮљ▓ - (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђЮЊАЮљ▓) - (MрхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣Юљ▓) + (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣Юљ▓) HРЂ╗┬╣ = MрхђRРЂ╗┬╣M - (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M), where we have used parentheses to separate expressions that we will compute separately. For example, we have :: Юљ▓рхђЮЊАЮљ▓ = Юљ▓рхђ(UРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ РіЌ I)(РѕѓCРѓђ РіЌ GGрхђ)(UРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ РіЌ I)Юљ▓ = Юљ▓рхђ(UРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђРѕѓCРѓђ РіЌ G)(UРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ РіЌ Gрхђ)Юљ▓ = vec(GрхђYUРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђРѕѓCРѓђ)рхђvec(GрхђYUРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ), when the derivative is over the parameters of CРѓђ. Otherwise, we have Юљ▓рхђЮЊАЮљ▓ = vec(YUРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђРѕѓCРѓЂ)рхђvec(YUРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ). The above equations can be more compactly written as Юљ▓рхђЮЊАЮљ▓ = vec(EрхбрхђYWРѕѓCрхб)рхђvec(EрхбрхђYW), where W = UРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ, EРѓђ = G, and EРѓЂ = I. We will now just state the results for the other instances of the aBc form, which follow similar derivations:: XрхђЮЊАX = (LРѓђрхђWРѕѓCрхбWLРѓђ) РіЌ (GрхђEрхбEрхбрхђG) MрхђЮЊАy = (AрхђWРѕѓCрхбРіЌXрхђEрхб)vec(EрхбрхђYW) = vec(XрхђEрхбEрхбрхђYWРѕѓCрхбWA) MрхђЮЊАX = AрхђWРѕѓCрхбWLРѓђ РіЌ XрхђEрхбEрхбрхђG MрхђЮЊАM = AрхђWРѕѓCрхбWA РіЌ XрхђEрхбEрхбрхђX XрхђЮЊАЮљ▓ = GрхђEрхбEрхбрхђYWРѕѓCрхбWLРѓђ From Woodbury matrix identity and Kronecker product properties we have :: tr(KРЂ╗┬╣РѕѓK) = tr[WРѕѓCрхб]tr[EрхбEрхбрхђ] - tr[ZРЂ╗┬╣(XрхђЮЊАX)] tr(HРЂ╗┬╣РѕѓH) = - tr[(MрхђRРЂ╗┬╣M)(MрхђЮЋѓM)] + tr[(MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M)(MрхђЮЋѓM)] Note also that :: РѕѓЮЏЃ = HРЂ╗┬╣MрхђЮЋѓMЮЏЃ - HРЂ╗┬╣MрхђЮЋѓЮљ▓. Returns ------- C0.Lu : ndarray Gradient of the log of the marginal likelihood over CРѓђ parameters. C1.Lu : ndarray Gradient of the log of the marginal likelihood over CРѓЂ parameters. """ from numpy_sugar.linalg import lu_solve terms = self._terms dC0 = self._cov.C0.gradient()["Lu"] dC1 = self._cov.C1.gradient()["Lu"] b = terms["b"] W = terms["W"] Lh = terms["Lh"] Lz = terms["Lz"] WA = terms["WA"] WL0 = terms["WL0"] YW = terms["YW"] MRiM = terms["MRiM"] MRiy = terms["MRiy"] XRiM = terms["XRiM"] XRiy = terms["XRiy"] ZiXRiM = terms["ZiXRiM"] ZiXRiy = terms["ZiXRiy"] WdC0 = _mdot(W, dC0) WdC1 = _mdot(W, dC1) AWdC0 = _mdot(WA.T, dC0) AWdC1 = _mdot(WA.T, dC1) # MрхђЮЊАM MR0M = _mkron(_mdot(AWdC0, WA), self._XGGX) MR1M = _mkron(_mdot(AWdC1, WA), self._XX) # MрхђЮЊАX MR0X = _mkron(_mdot(AWdC0, WL0), self._XGGG) MR1X = _mkron(_mdot(AWdC1, WL0), self._GX.T) # MрхђЮЊАЮљ▓ = (AрхђWРѕѓCрхбРіЌXрхђEрхб)vec(EрхбрхђYW) = vec(XрхђEрхбEрхбрхђYWРѕѓCрхбWA) MR0y = vec(_mdot(self._XGGY, _mdot(WdC0, WA))) MR1y = vec(_mdot(self._XY, WdC1, WA)) # XрхђЮЊАX XR0X = _mkron(_mdot(WL0.T, dC0, WL0), self._GGGG) XR1X = _mkron(_mdot(WL0.T, dC1, WL0), self._GG) # XрхђЮЊАЮљ▓ XR0y = vec(_mdot(self._GGGY, WdC0, WL0)) XR1y = vec(_mdot(self._GY, WdC1, WL0)) # Юљ▓рхђЮЊАЮљ▓ = vec(EрхбрхђYWРѕѓCрхб)рхђvec(EрхбрхђYW) yR0y = vec(_mdot(self._GY, WdC0)).T @ vec(self._GY @ W) yR1y = (YW.T * _mdot(self._Y, WdC1).T).T.sum(axis=(0, 1)) ZiXR0X = lu_solve(Lz, XR0X) ZiXR1X = lu_solve(Lz, XR1X) ZiXR0y = lu_solve(Lz, XR0y) ZiXR1y = lu_solve(Lz, XR1y) # MрхђЮЋѓy = MрхђЮЊАЮљ▓ - (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђЮЊАЮљ▓) - (MрхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣Юљ▓) # + (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣Юљ▓) MK0y = MR0y - _mdot(XRiM.T, ZiXR0y) - _mdot(MR0X, ZiXRiy) MK0y += _mdot(XRiM.T, ZiXR0X, ZiXRiy) MK1y = MR1y - _mdot(XRiM.T, ZiXR1y) - _mdot(MR1X, ZiXRiy) MK1y += _mdot(XRiM.T, ZiXR1X, ZiXRiy) # Юљ▓рхђЮЋѓЮљ▓ = Юљ▓рхђЮЊАЮљ▓ - 2(Юљ▓рхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣Юљ▓) + (Юљ▓рхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣Юљ▓) yK0y = yR0y - 2 * XR0y.T @ ZiXRiy + ZiXRiy.T @ _mdot(XR0X, ZiXRiy) yK1y = yR1y - 2 * XR1y.T @ ZiXRiy + ZiXRiy.T @ _mdot(XR1X, ZiXRiy) # MрхђЮЋѓM = MрхђЮЊАM - (MрхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M) - (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђЮЊАM) # + (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M) MR0XZiXRiM = _mdot(MR0X, ZiXRiM) MK0M = MR0M - MR0XZiXRiM - MR0XZiXRiM.transpose([1, 0, 2]) MK0M += _mdot(ZiXRiM.T, XR0X, ZiXRiM) MR1XZiXRiM = _mdot(MR1X, ZiXRiM) MK1M = MR1M - MR1XZiXRiM - MR1XZiXRiM.transpose([1, 0, 2]) MK1M += _mdot(ZiXRiM.T, XR1X, ZiXRiM) MK0m = _mdot(MK0M, b) mK0y = b.T @ MK0y mK0m = b.T @ MK0m MK1m = _mdot(MK1M, b) mK1y = b.T @ MK1y mK1m = b.T @ MK1m XRim = XRiM @ b MRim = MRiM @ b db = { "C0.Lu": lu_solve(Lh, MK0m - MK0y), "C1.Lu": lu_solve(Lh, MK1m - MK1y) } grad = { "C0.Lu": -trace(WdC0) * self._trGG + trace(ZiXR0X), "C1.Lu": -trace(WdC1) * self.nsamples + trace(ZiXR1X), } if self._restricted: grad["C0.Lu"] += lu_solve(Lh, MK0M).diagonal().sum(1) grad["C1.Lu"] += lu_solve(Lh, MK1M).diagonal().sum(1) mKiM = MRim.T - XRim.T @ ZiXRiM yKiM = MRiy.T - XRiy.T @ ZiXRiM grad["C0.Lu"] += yK0y - 2 * mK0y + mK0m - 2 * _mdot(mKiM, db["C0.Lu"]) grad["C0.Lu"] += 2 * _mdot(yKiM, db["C0.Lu"]) grad["C1.Lu"] += yK1y - 2 * mK1y + mK1m - 2 * _mdot(mKiM, db["C1.Lu"]) grad["C1.Lu"] += 2 * _mdot(yKiM, db["C1.Lu"]) grad["C0.Lu"] /= 2 grad["C1.Lu"] /= 2 return grad
def _terms(self): from numpy_sugar.linalg import ddot, lu_slogdet, sum2diag if self._cache["terms"] is not None: return self._cache["terms"] L0 = self._cov.C0.L S, U = self._cov.C1.eigh() W = ddot(U, 1 / S) @ U.T S = 1 / sqrt(S) Y = self._Y A = self._mean.A WL0 = W @ L0 YW = Y @ W WA = W @ A L0WA = L0.T @ WA Z = kron(L0.T @ WL0, self._GG) Z = sum2diag(Z, 1) Lz = lu_factor(Z, check_finite=False) # Юљ▓рхђRРЂ╗┬╣Юљ▓ = vec(YW)рхђЮљ▓ yRiy = (YW * self._Y).sum() # MрхђRРЂ╗┬╣M = AрхђWA РіЌ XрхђX MRiM = kron(A.T @ WA, self._XX) # XрхђRРЂ╗┬╣Юљ▓ = vec(GрхђYWLРѓђ) XRiy = vec(self._GY @ WL0) # XрхђRРЂ╗┬╣M = (LРѓђрхђWA) РіЌ (GрхђX) XRiM = kron(L0WA, self._GX) # MрхђRРЂ╗┬╣Юљ▓ = vec(XрхђYWA) MRiy = vec(self._XY @ WA) ZiXRiM = lu_solve(Lz, XRiM) ZiXRiy = lu_solve(Lz, XRiy) MRiXZiXRiy = ZiXRiM.T @ XRiy MRiXZiXRiM = XRiM.T @ ZiXRiM yKiy = yRiy - XRiy @ ZiXRiy MKiy = MRiy - MRiXZiXRiy H = MRiM - MRiXZiXRiM Lh = lu_factor(H, check_finite=False) b = lu_solve(Lh, MKiy) B = unvec(b, (self.ncovariates, -1)) self._mean.B = B XRim = XRiM @ b ZiXRim = ZiXRiM @ b mRiy = b.T @ MRiy mRim = b.T @ MRiM @ b logdetK = lu_slogdet(Lz)[1] logdetK -= 2 * log(S).sum() * self.nsamples mKiy = mRiy - XRim.T @ ZiXRiy mKim = mRim - XRim.T @ ZiXRim self._cache["terms"] = { "logdetK": logdetK, "mKiy": mKiy, "mKim": mKim, "b": b, "Z": Z, "B": B, "Lz": Lz, "S": S, "W": W, "WA": WA, "YW": YW, "WL0": WL0, "yRiy": yRiy, "MRiM": MRiM, "XRiy": XRiy, "XRiM": XRiM, "ZiXRiM": ZiXRiM, "ZiXRiy": ZiXRiy, "ZiXRim": ZiXRim, "MRiy": MRiy, "mRim": mRim, "mRiy": mRiy, "XRim": XRim, "yKiy": yKiy, "H": H, "Lh": Lh, "MRiXZiXRiy": MRiXZiXRiy, "MRiXZiXRiM": MRiXZiXRiM, } return self._cache["terms"]
def test_kron2sumcov(): G = array([[-1.5, 1.0], [-1.5, 1.0], [-1.5, 1.0]]) Lr = array([[3], [2]], float) Ln = array([[1, 0], [2, 1]], float) cov = Kron2SumCov(G, 2, 1) cov.C0.L = Lr cov.C1.L = Ln I = eye(G.shape[0]) assert_allclose( cov.value(), kron(Lr @ Lr.T, G @ G.T) + kron(Ln @ Ln.T, I), atol=1e-4 ) assert_allclose(cov._check_grad(), 0, atol=1e-5) assert_allclose(cov.solve(cov.value()), eye(2 * G.shape[0]), atol=1e-7) assert_allclose(cov.logdet(), slogdet(cov.value())[1], atol=1e-7) def func(x): cov.C0.Lu = x[:2] cov.C1.Lu = x[2:] return cov.logdet() def grad(x): cov.C0.Lu = x[:2] cov.C1.Lu = x[2:] D = cov.logdet_gradient() return concatenate((D["C0.Lu"], D["C1.Lu"])) random = RandomState(0) assert_allclose(check_grad(func, grad, random.randn(5)), 0, atol=1e-5) V = random.randn(3, 2) g = cov.C0.gradient()["Lu"] g0 = cov.gradient_dot(vec(V))["C0.Lu"] for i in range(2): assert_allclose(g0[..., i], kron(g[..., i], G @ G.T) @ vec(V)) g = cov.C1.gradient()["Lu"] g0 = cov.gradient_dot(vec(V))["C1.Lu"] for i in range(3): assert_allclose(g0[..., i], kron(g[..., i], eye(3)) @ vec(V)) V = random.randn(3, 2, 4) g = cov.C0.gradient()["Lu"] g0 = cov.gradient_dot(vec(V))["C0.Lu"] for i in range(2): for j in range(4): assert_allclose(g0[j, ..., i], kron(g[..., i], G @ G.T) @ vec(V[..., j])) g = cov.C1.gradient()["Lu"] g0 = cov.gradient_dot(vec(V))["C1.Lu"] for i in range(3): for j in range(4): assert_allclose(g0[j, ..., i], kron(g[..., i], eye(3)) @ vec(V[..., j])) M = random.randn(2 * G.shape[0], 2 * 4) R = cov.LdKL_dot(M) dK = cov.gradient() L = kron(cov.Lh, cov.Lx) for i in range(cov.C0.shape[0]): for j in range(M.shape[1]): expected = L @ dK["C0.Lu"][..., i] @ L.T @ M[:, [j]] assert_allclose(R["C0.Lu"][:, [j], i], expected, atol=1e-7) for i in range(cov.C1.shape[0]): for j in range(M.shape[1]): expected = L @ dK["C1.Lu"][..., i] @ L.T @ M[:, [j]] assert_allclose(R["C1.Lu"][:, [j], i], expected, atol=1e-7)
def B(self, v): self._vecB.value = vec(asarray(v, float))