예제 #1
0
def test_lmm_kron_scan():
    random = RandomState(0)
    n = 20
    Y = random.randn(n, 3)
    A = random.randn(3, 3)
    A = A @ A.T
    F = random.randn(n, 2)
    G = random.randn(n, 6)
    lmm = Kron2Sum(Y, A, F, G, restricted=True)
    lmm.fit(verbose=False)
    scan = lmm.get_fast_scanner()

    m = lmm.mean()
    K = lmm.covariance()

    def func(scale):
        mv = st.multivariate_normal(m, scale * K)
        return -mv.logpdf(vec(Y))

    s = minimize(func, 1e-3, 5.0, 1e-5)[0]

    assert_allclose(scan.null_lml(),
                    st.multivariate_normal(m, s * K).logpdf(vec(Y)))
    assert_allclose(kron(A, F) @ scan.null_beta, m)

    A1 = random.randn(3, 2)
    F1 = random.randn(n, 4)

    r = scan.scan(A1, F1)
    assert_allclose(r["scale"], 0.7365021111700154, rtol=1e-3)

    m = kron(A, F) @ vec(r["effsizes0"]) + kron(A1, F1) @ vec(r["effsizes1"])

    def func(scale):
        mv = st.multivariate_normal(m, scale * K)
        return -mv.logpdf(vec(Y))

    s = minimize(func, 1e-3, 5.0, 1e-5)[0]

    assert_allclose(r["lml"], st.multivariate_normal(m, s * K).logpdf(vec(Y)))

    r = scan.scan(empty((3, 0)), F1)
    assert_allclose(r["lml"], -85.36667704747371, rtol=1e-4)
    assert_allclose(r["scale"], 0.8999995537936586, rtol=1e-3)
    assert_allclose(
        r["effsizes0"],
        [
            [0.21489119796865844, 0.6412947101778663, -0.7176143380221816],
            [0.8866722740598517, -0.18731140321348416, -0.26118052682069],
        ],
        rtol=1e-2,
        atol=1e-2,
    )
    assert_allclose(r["effsizes1"], [])
예제 #2
0
def test_kron2sum_large_outcome():

    random = RandomState(2)
    n = 50
    A = random.randn(3, 3)
    A = A @ A.T
    F = random.randn(n, 2)
    G = random.randn(n, 4)
    B = random.randn(2, 3)
    C0 = random.randn(3, 3)
    C0 = C0 @ C0.T
    C1 = random.randn(3, 3)
    C1 = C1 @ C1.T
    K = kron(C0, (G @ G.T)) + kron(C1, eye(n))
    y = multivariate_normal(random, kron(A, F) @ vec(B), K)
    Y = unvec(y, (n, 3))
    Y = Y / Y.std(0)

    lmm = Kron2Sum(Y, A, F, G, restricted=False)
    lmm.fit(verbose=False)

    assert_allclose(lmm.lml(), -12.163158697588926)
    assert_allclose(lmm.C0[0, 1], -0.004781646218546575, rtol=1e-3, atol=1e-5)
    assert_allclose(lmm.C1[0, 1], 0.03454122242999587, rtol=1e-3, atol=1e-5)
    assert_allclose(lmm.beta[2], -0.02553979383437496, rtol=1e-3, atol=1e-5)
    assert_allclose(lmm.beta_covariance[0, 1],
                    0.0051326042358990865,
                    rtol=1e-3,
                    atol=1e-5)
    assert_allclose(lmm.mean()[3], 0.3442913781854699, rtol=1e-2, atol=1e-5)
    assert_allclose(lmm.covariance()[0, 1],
                    0.0010745698663887468,
                    rtol=1e-3,
                    atol=1e-5)
예제 #3
0
    def beta(self):
        """
        Fixed-effect sizes ­ЮЏЃ = vec(B).

        Returns
        -------
        fixed-effects : ndarray
            ­ЮЏЃ from ­ЮЏЃ = vec(B).
        """
        return vec(self.B)
예제 #4
0
def test_lmm_kron_scan_redundant():
    random = RandomState(0)
    n = 30
    Y = random.randn(n, 3)
    A = random.randn(3, 3)
    A = A @ A.T
    F = random.randn(n, 2)
    G = random.randn(n, 6)
    G = concatenate([G, G], axis=1)
    lmm = Kron2Sum(Y, A, F, G, restricted=True)
    lmm.fit(verbose=False)
    scan = lmm.get_fast_scanner()

    m = lmm.mean()
    K = lmm.covariance()

    def func(scale):
        mv = st.multivariate_normal(m, scale * K)
        return -mv.logpdf(vec(Y))

    s = minimize(func, 1e-3, 5.0, 1e-5)[0]

    assert_allclose(scan.null_lml(),
                    st.multivariate_normal(m, s * K).logpdf(vec(Y)))
    assert_allclose(kron(A, F) @ scan.null_beta, m)

    A1 = random.randn(3, 2)
    F1 = random.randn(n, 4)
    F1 = concatenate([F1, F1], axis=1)

    r = scan.scan(A1, F1)
    assert_allclose(r["scale"], 0.8843540849467378, rtol=1e-3)

    m = kron(A, F) @ vec(r["effsizes0"]) + kron(A1, F1) @ vec(r["effsizes1"])

    def func(scale):
        mv = st.multivariate_normal(m, scale * K)
        return -mv.logpdf(vec(Y))

    s = minimize(func, 1e-3, 5.0, 1e-5)[0]

    assert_allclose(r["lml"], st.multivariate_normal(m, s * K).logpdf(vec(Y)))
예제 #5
0
def test_lmm_kron_scan_with_lmm():
    random = RandomState(0)
    n = 15
    Y = random.randn(n, 3)
    A = random.randn(3, 3)
    A = A @ A.T
    F = random.randn(n, 2)
    G = random.randn(n, 6)

    klmm = Kron2Sum(Y, A, F, G, restricted=True)
    klmm.fit(verbose=False)
    kscan = klmm.get_fast_scanner()

    K = klmm.covariance()

    X = kron(A, F)
    QS = economic_qs(K)
    scan = FastScanner(vec(Y), X, QS, 0.0)

    assert_allclose(klmm.covariance(), K)
    assert_allclose(kscan.null_scale, scan.null_scale)
    assert_allclose(kscan.null_beta, scan.null_beta)
    assert_allclose(kscan.null_lml(), scan.null_lml())
    assert_allclose(kscan.null_beta_covariance, scan.null_beta_covariance)

    A1 = random.randn(3, 2)
    F1 = random.randn(n, 2)
    M = kron(A1, F1)

    kr = kscan.scan(A1, F1)
    r = scan.scan(M)
    assert_allclose(kr["lml"], r["lml"])
    assert_allclose(kr["scale"], r["scale"])
    assert_allclose(vec(kr["effsizes0"]), r["effsizes0"])
    assert_allclose(vec(kr["effsizes1"]), r["effsizes1"])
    assert_allclose(vec(kr["effsizes0_se"]), r["effsizes0_se"])
    assert_allclose(vec(kr["effsizes1_se"]), r["effsizes1_se"])
예제 #6
0
    def null_scale(self):
        """
        Optimal s according to the marginal likelihood.

        The optimal s is given by

            s = (n·p)⁻¹𝐲ᵀK⁻¹(𝐲 - 𝐦),

        where 𝐦 = (A ⊗ X)vec(𝚩) and 𝚩 is optimal.

        Returns
        -------
        scale : float
            Optimal scale.
        """
        np = self._nsamples * self._ntraits
        b = vec(self.null_beta)
        mKiy = b.T @ self._MKiy
        sqrtdot = self._yKiy - mKiy
        scale = sqrtdot / np
        return scale
예제 #7
0
def test_kron2sum_unrestricted_lml():
    random = RandomState(0)
    Y = random.randn(5, 3)
    A = random.randn(3, 3)
    A = A @ A.T
    F = random.randn(5, 2)
    G = random.randn(5, 4)
    lmm = Kron2Sum(Y, A, F, G, restricted=False)
    y = vec(lmm._Y)

    m = lmm.mean()
    K = lmm.covariance()
    assert_allclose(lmm.lml(), st.multivariate_normal(m, K).logpdf(y))

    lmm._cov.C0.Lu = random.randn(3)
    m = lmm.mean()
    K = lmm.covariance()
    assert_allclose(lmm.lml(), st.multivariate_normal(m, K).logpdf(y))

    lmm._cov.C1.Lu = random.randn(6)
    m = lmm.mean()
    K = lmm.covariance()
    assert_allclose(lmm.lml(), st.multivariate_normal(m, K).logpdf(y))
예제 #8
0
    def scan(self, A1, X1):
        """
        LML, fixed-effect sizes, and scale of the candidate set.

        Parameters
        ----------
        A1 : (p, e) array_like
            Trait-by-environments design matrix.
        X1 : (n, m) array_like
            Variants set matrix.

        Returns
        -------
        lml : float
            Log of the marginal likelihood for the set.
        effsizes0 : (c, p) ndarray
            Fixed-effect sizes for the covariates.
        effsizes0_se : (c, p) ndarray
            Fixed-effect size standard errors for the covariates.
        effsizes1 : (m, e) ndarray
            Fixed-effect sizes for the candidates.
        effsizes1_se : (m, e) ndarray
            Fixed-effect size standard errors for the candidates.
        scale : float
            Optimal scale.
        """
        from numpy import empty
        from numpy.linalg import multi_dot
        from numpy_sugar import epsilon, is_all_finite
        from scipy.linalg import cho_solve

        A1 = asarray(A1, float)
        X1 = asarray(X1, float)

        if not is_all_finite(A1):
            raise ValueError("A1 parameter has non-finite elements.")

        if not is_all_finite(X1):
            raise ValueError("X1 parameter has non-finite elements.")

        if A1.shape[1] == 0:
            beta_se = sqrt(self.null_beta_covariance.diagonal())
            return {
                "lml": self.null_lml(),
                "effsizes0": unvec(self.null_beta, (self._ncovariates, -1)),
                "effsizes0_se": unvec(beta_se, (self._ncovariates, -1)),
                "effsizes1": empty((0, )),
                "effsizes1_se": empty((0, )),
                "scale": self.null_scale,
            }

        X1X1 = X1.T @ X1
        XX1 = self._X.T @ X1
        AWA1 = self._WA.T @ A1
        A1W = A1.T @ self._W
        GX1 = self._G.T @ X1

        MRiM1 = kron(AWA1, XX1)
        M1RiM1 = kron(A1W @ A1, X1X1)

        M1Riy = vec(multi_dot([X1.T, self._Y, A1W.T]))
        XRiM1 = kron(self._WL0.T @ A1, GX1)
        ZiXRiM1 = cho_solve(self._Lz, XRiM1)

        MRiXZiXRiM1 = self._XRiM.T @ ZiXRiM1
        M1RiXZiXRiM1 = XRiM1.T @ ZiXRiM1
        M1RiXZiXRiy = XRiM1.T @ self._ZiXRiy

        T0 = [[self._MRiM, MRiM1], [MRiM1.T, M1RiM1]]
        T1 = [[self._MRiXZiXRiM, MRiXZiXRiM1], [MRiXZiXRiM1.T, M1RiXZiXRiM1]]
        T2 = [self._MRiy, M1Riy]
        T3 = [self._MRiXZiXRiy, M1RiXZiXRiy]

        MKiM = block(T0) - block(T1)
        MKiy = block(T2) - block(T3)
        beta = rsolve(MKiM, MKiy)

        mKiy = beta.T @ MKiy
        cp = self._ntraits * self._ncovariates
        effsizes0 = unvec(beta[:cp], (self._ncovariates, self._ntraits))
        effsizes1 = unvec(beta[cp:], (X1.shape[1], A1.shape[1]))

        np = self._nsamples * self._ntraits
        sqrtdot = self._yKiy - mKiy
        scale = clip(sqrtdot / np, epsilon.tiny, inf)
        lml = self._static_lml() / 2 - np * safe_log(scale) / 2 - np / 2

        effsizes_se = sqrt(
            clip(scale * pinv(MKiM).diagonal(), epsilon.tiny, inf))
        effsizes0_se = unvec(effsizes_se[:cp],
                             (self._ncovariates, self._ntraits))
        effsizes1_se = unvec(effsizes_se[cp:], (X1.shape[1], A1.shape[1]))

        return {
            "lml": lml,
            "effsizes0": effsizes0,
            "effsizes1": effsizes1,
            "scale": scale,
            "effsizes0_se": effsizes0_se,
            "effsizes1_se": effsizes1_se,
        }
예제 #9
0
 def func(scale):
     mv = st.multivariate_normal(m, scale * K)
     return -mv.logpdf(vec(Y))
예제 #10
0
    def _lml_gradient(self):
        """
        Gradient of the log of the marginal likelihood.

        Let ­Юљ▓ = vec(Y), ­ЮЋѓ = KРЂ╗┬╣Рѕѓ(K)KРЂ╗┬╣, and H = MрхђKРЂ╗┬╣M. The gradient is given by::

            2РІЁРѕѓlog(p(­Юљ▓)) = -tr(KРЂ╗┬╣РѕѓK) - tr(HРЂ╗┬╣РѕѓH) + ­Юљ▓рхђ­ЮЋѓ­Юљ▓ - ­Юљдрхђ­ЮЋѓ(2РІЁ­Юљ▓-­Юљд)
                - 2РІЁ(­Юљд-­Юљ▓)рхђKРЂ╗┬╣Рѕѓ(­Юљд).

        Observe that

            Рѕѓ­ЮЏЃ = -HРЂ╗┬╣(РѕѓH)­ЮЏЃ - HРЂ╗┬╣Mрхђ­ЮЋѓ­Юљ▓ and РѕѓH = -Mрхђ­ЮЋѓM.

        Let Z = I + XрхђRРЂ╗┬╣X and ­ЮЊА = RРЂ╗┬╣(РѕѓK)RРЂ╗┬╣. We use Woodbury matrix identity to
        write ::

            ­Юљ▓рхђ­ЮЋѓ­Юљ▓ = ­Юљ▓рхђ­ЮЊА­Юљ▓ - 2(­Юљ▓рхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣­Юљ▓) + (­Юљ▓рхђRРЂ╗┬╣X)ZРЂ╗┬╣(Xрхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣­Юљ▓)
            Mрхђ­ЮЋѓM = Mрхђ­ЮЊАM - 2(Mрхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M) + (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(Xрхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M)
            Mрхђ­ЮЋѓ­Юљ▓ = Mрхђ­ЮЊА­Юљ▓ - (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(Xрхђ­ЮЊА­Юљ▓) - (Mрхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣­Юљ▓)
                  + (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(Xрхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣­Юљ▓)
            HРЂ╗┬╣   = MрхђRРЂ╗┬╣M - (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M),

        where we have used parentheses to separate expressions
        that we will compute separately. For example, we have ::

            ­Юљ▓рхђ­ЮЊА­Юљ▓ = ­Юљ▓рхђ(UРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ РіЌ I)(РѕѓCРѓђ РіЌ GGрхђ)(UРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ РіЌ I)­Юљ▓
                  = ­Юљ▓рхђ(UРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђРѕѓCРѓђ РіЌ G)(UРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ РіЌ Gрхђ)­Юљ▓
                  = vec(GрхђYUРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђРѕѓCРѓђ)рхђvec(GрхђYUРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ),

        when the derivative is over the parameters of CРѓђ. Otherwise, we have

            ­Юљ▓рхђ­ЮЊА­Юљ▓ = vec(YUРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђРѕѓCРѓЂ)рхђvec(YUРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ).

        The above equations can be more compactly written as

            ­Юљ▓рхђ­ЮЊА­Юљ▓ = vec(EрхбрхђYWРѕѓCрхб)рхђvec(EрхбрхђYW),

        where W = UРѓЂSРѓЂРЂ╗┬╣UРѓЂрхђ, EРѓђ = G, and EРѓЂ = I. We will now just state the results for
        the other instances of the aBc form, which follow similar derivations::

            Xрхђ­ЮЊАX = (LРѓђрхђWРѕѓCрхбWLРѓђ) РіЌ (GрхђEрхбEрхбрхђG)
            Mрхђ­ЮЊАy = (AрхђWРѕѓCрхбРіЌXрхђEрхб)vec(EрхбрхђYW) = vec(XрхђEрхбEрхбрхђYWРѕѓCрхбWA)
            Mрхђ­ЮЊАX = AрхђWРѕѓCрхбWLРѓђ РіЌ XрхђEрхбEрхбрхђG
            Mрхђ­ЮЊАM = AрхђWРѕѓCрхбWA РіЌ XрхђEрхбEрхбрхђX
            Xрхђ­ЮЊА­Юљ▓ = GрхђEрхбEрхбрхђYWРѕѓCрхбWLРѓђ

        From Woodbury matrix identity and Kronecker product properties we have ::

            tr(KРЂ╗┬╣РѕѓK) = tr[WРѕѓCрхб]tr[EрхбEрхбрхђ] - tr[ZРЂ╗┬╣(Xрхђ­ЮЊАX)]
            tr(HРЂ╗┬╣РѕѓH) = - tr[(MрхђRРЂ╗┬╣M)(Mрхђ­ЮЋѓM)] + tr[(MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M)(Mрхђ­ЮЋѓM)]

        Note also that ::

            Рѕѓ­ЮЏЃ = HРЂ╗┬╣Mрхђ­ЮЋѓM­ЮЏЃ - HРЂ╗┬╣Mрхђ­ЮЋѓ­Юљ▓.

        Returns
        -------
        C0.Lu : ndarray
            Gradient of the log of the marginal likelihood over CРѓђ parameters.
        C1.Lu : ndarray
            Gradient of the log of the marginal likelihood over CРѓЂ parameters.
        """
        from numpy_sugar.linalg import lu_solve

        terms = self._terms
        dC0 = self._cov.C0.gradient()["Lu"]
        dC1 = self._cov.C1.gradient()["Lu"]

        b = terms["b"]
        W = terms["W"]
        Lh = terms["Lh"]
        Lz = terms["Lz"]
        WA = terms["WA"]
        WL0 = terms["WL0"]
        YW = terms["YW"]
        MRiM = terms["MRiM"]
        MRiy = terms["MRiy"]
        XRiM = terms["XRiM"]
        XRiy = terms["XRiy"]
        ZiXRiM = terms["ZiXRiM"]
        ZiXRiy = terms["ZiXRiy"]

        WdC0 = _mdot(W, dC0)
        WdC1 = _mdot(W, dC1)

        AWdC0 = _mdot(WA.T, dC0)
        AWdC1 = _mdot(WA.T, dC1)

        # Mрхђ­ЮЊАM
        MR0M = _mkron(_mdot(AWdC0, WA), self._XGGX)
        MR1M = _mkron(_mdot(AWdC1, WA), self._XX)

        # Mрхђ­ЮЊАX
        MR0X = _mkron(_mdot(AWdC0, WL0), self._XGGG)
        MR1X = _mkron(_mdot(AWdC1, WL0), self._GX.T)

        # Mрхђ­ЮЊА­Юљ▓ = (AрхђWРѕѓCрхбРіЌXрхђEрхб)vec(EрхбрхђYW) = vec(XрхђEрхбEрхбрхђYWРѕѓCрхбWA)
        MR0y = vec(_mdot(self._XGGY, _mdot(WdC0, WA)))
        MR1y = vec(_mdot(self._XY, WdC1, WA))

        # Xрхђ­ЮЊАX
        XR0X = _mkron(_mdot(WL0.T, dC0, WL0), self._GGGG)
        XR1X = _mkron(_mdot(WL0.T, dC1, WL0), self._GG)

        # Xрхђ­ЮЊА­Юљ▓
        XR0y = vec(_mdot(self._GGGY, WdC0, WL0))
        XR1y = vec(_mdot(self._GY, WdC1, WL0))

        # ­Юљ▓рхђ­ЮЊА­Юљ▓ = vec(EрхбрхђYWРѕѓCрхб)рхђvec(EрхбрхђYW)
        yR0y = vec(_mdot(self._GY, WdC0)).T @ vec(self._GY @ W)
        yR1y = (YW.T * _mdot(self._Y, WdC1).T).T.sum(axis=(0, 1))

        ZiXR0X = lu_solve(Lz, XR0X)
        ZiXR1X = lu_solve(Lz, XR1X)
        ZiXR0y = lu_solve(Lz, XR0y)
        ZiXR1y = lu_solve(Lz, XR1y)

        # Mрхђ­ЮЋѓy = Mрхђ­ЮЊА­Юљ▓ - (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(Xрхђ­ЮЊА­Юљ▓) - (Mрхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣­Юљ▓)
        #       + (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(Xрхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣­Юљ▓)
        MK0y = MR0y - _mdot(XRiM.T, ZiXR0y) - _mdot(MR0X, ZiXRiy)
        MK0y += _mdot(XRiM.T, ZiXR0X, ZiXRiy)
        MK1y = MR1y - _mdot(XRiM.T, ZiXR1y) - _mdot(MR1X, ZiXRiy)
        MK1y += _mdot(XRiM.T, ZiXR1X, ZiXRiy)

        # ­Юљ▓рхђ­ЮЋѓ­Юљ▓ = ­Юљ▓рхђ­ЮЊА­Юљ▓ - 2(­Юљ▓рхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣­Юљ▓) + (­Юљ▓рхђRРЂ╗┬╣X)ZРЂ╗┬╣(Xрхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣­Юљ▓)
        yK0y = yR0y - 2 * XR0y.T @ ZiXRiy + ZiXRiy.T @ _mdot(XR0X, ZiXRiy)
        yK1y = yR1y - 2 * XR1y.T @ ZiXRiy + ZiXRiy.T @ _mdot(XR1X, ZiXRiy)

        # Mрхђ­ЮЋѓM = Mрхђ­ЮЊАM - (Mрхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M) - (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(Xрхђ­ЮЊАM)
        #       + (MрхђRРЂ╗┬╣X)ZРЂ╗┬╣(Xрхђ­ЮЊАX)ZРЂ╗┬╣(XрхђRРЂ╗┬╣M)
        MR0XZiXRiM = _mdot(MR0X, ZiXRiM)
        MK0M = MR0M - MR0XZiXRiM - MR0XZiXRiM.transpose([1, 0, 2])
        MK0M += _mdot(ZiXRiM.T, XR0X, ZiXRiM)
        MR1XZiXRiM = _mdot(MR1X, ZiXRiM)
        MK1M = MR1M - MR1XZiXRiM - MR1XZiXRiM.transpose([1, 0, 2])
        MK1M += _mdot(ZiXRiM.T, XR1X, ZiXRiM)

        MK0m = _mdot(MK0M, b)
        mK0y = b.T @ MK0y
        mK0m = b.T @ MK0m
        MK1m = _mdot(MK1M, b)
        mK1y = b.T @ MK1y
        mK1m = b.T @ MK1m
        XRim = XRiM @ b
        MRim = MRiM @ b

        db = {
            "C0.Lu": lu_solve(Lh, MK0m - MK0y),
            "C1.Lu": lu_solve(Lh, MK1m - MK1y)
        }

        grad = {
            "C0.Lu": -trace(WdC0) * self._trGG + trace(ZiXR0X),
            "C1.Lu": -trace(WdC1) * self.nsamples + trace(ZiXR1X),
        }

        if self._restricted:
            grad["C0.Lu"] += lu_solve(Lh, MK0M).diagonal().sum(1)
            grad["C1.Lu"] += lu_solve(Lh, MK1M).diagonal().sum(1)

        mKiM = MRim.T - XRim.T @ ZiXRiM
        yKiM = MRiy.T - XRiy.T @ ZiXRiM

        grad["C0.Lu"] += yK0y - 2 * mK0y + mK0m - 2 * _mdot(mKiM, db["C0.Lu"])
        grad["C0.Lu"] += 2 * _mdot(yKiM, db["C0.Lu"])
        grad["C1.Lu"] += yK1y - 2 * mK1y + mK1m - 2 * _mdot(mKiM, db["C1.Lu"])
        grad["C1.Lu"] += 2 * _mdot(yKiM, db["C1.Lu"])

        grad["C0.Lu"] /= 2
        grad["C1.Lu"] /= 2

        return grad
예제 #11
0
    def _terms(self):
        from numpy_sugar.linalg import ddot, lu_slogdet, sum2diag

        if self._cache["terms"] is not None:
            return self._cache["terms"]

        L0 = self._cov.C0.L
        S, U = self._cov.C1.eigh()
        W = ddot(U, 1 / S) @ U.T
        S = 1 / sqrt(S)
        Y = self._Y
        A = self._mean.A

        WL0 = W @ L0
        YW = Y @ W
        WA = W @ A
        L0WA = L0.T @ WA

        Z = kron(L0.T @ WL0, self._GG)
        Z = sum2diag(Z, 1)
        Lz = lu_factor(Z, check_finite=False)

        # ­Юљ▓рхђRРЂ╗┬╣­Юљ▓ = vec(YW)рхђ­Юљ▓
        yRiy = (YW * self._Y).sum()
        # MрхђRРЂ╗┬╣M = AрхђWA РіЌ XрхђX
        MRiM = kron(A.T @ WA, self._XX)
        # XрхђRРЂ╗┬╣­Юљ▓ = vec(GрхђYWLРѓђ)
        XRiy = vec(self._GY @ WL0)
        # XрхђRРЂ╗┬╣M = (LРѓђрхђWA) РіЌ (GрхђX)
        XRiM = kron(L0WA, self._GX)
        # MрхђRРЂ╗┬╣­Юљ▓ = vec(XрхђYWA)
        MRiy = vec(self._XY @ WA)

        ZiXRiM = lu_solve(Lz, XRiM)
        ZiXRiy = lu_solve(Lz, XRiy)

        MRiXZiXRiy = ZiXRiM.T @ XRiy
        MRiXZiXRiM = XRiM.T @ ZiXRiM

        yKiy = yRiy - XRiy @ ZiXRiy
        MKiy = MRiy - MRiXZiXRiy
        H = MRiM - MRiXZiXRiM
        Lh = lu_factor(H, check_finite=False)
        b = lu_solve(Lh, MKiy)
        B = unvec(b, (self.ncovariates, -1))
        self._mean.B = B
        XRim = XRiM @ b

        ZiXRim = ZiXRiM @ b
        mRiy = b.T @ MRiy
        mRim = b.T @ MRiM @ b

        logdetK = lu_slogdet(Lz)[1]
        logdetK -= 2 * log(S).sum() * self.nsamples

        mKiy = mRiy - XRim.T @ ZiXRiy
        mKim = mRim - XRim.T @ ZiXRim

        self._cache["terms"] = {
            "logdetK": logdetK,
            "mKiy": mKiy,
            "mKim": mKim,
            "b": b,
            "Z": Z,
            "B": B,
            "Lz": Lz,
            "S": S,
            "W": W,
            "WA": WA,
            "YW": YW,
            "WL0": WL0,
            "yRiy": yRiy,
            "MRiM": MRiM,
            "XRiy": XRiy,
            "XRiM": XRiM,
            "ZiXRiM": ZiXRiM,
            "ZiXRiy": ZiXRiy,
            "ZiXRim": ZiXRim,
            "MRiy": MRiy,
            "mRim": mRim,
            "mRiy": mRiy,
            "XRim": XRim,
            "yKiy": yKiy,
            "H": H,
            "Lh": Lh,
            "MRiXZiXRiy": MRiXZiXRiy,
            "MRiXZiXRiM": MRiXZiXRiM,
        }
        return self._cache["terms"]
예제 #12
0
def test_kron2sumcov():
    G = array([[-1.5, 1.0], [-1.5, 1.0], [-1.5, 1.0]])
    Lr = array([[3], [2]], float)
    Ln = array([[1, 0], [2, 1]], float)

    cov = Kron2SumCov(G, 2, 1)
    cov.C0.L = Lr
    cov.C1.L = Ln

    I = eye(G.shape[0])
    assert_allclose(
        cov.value(), kron(Lr @ Lr.T, G @ G.T) + kron(Ln @ Ln.T, I), atol=1e-4
    )
    assert_allclose(cov._check_grad(), 0, atol=1e-5)
    assert_allclose(cov.solve(cov.value()), eye(2 * G.shape[0]), atol=1e-7)
    assert_allclose(cov.logdet(), slogdet(cov.value())[1], atol=1e-7)

    def func(x):
        cov.C0.Lu = x[:2]
        cov.C1.Lu = x[2:]
        return cov.logdet()

    def grad(x):
        cov.C0.Lu = x[:2]
        cov.C1.Lu = x[2:]
        D = cov.logdet_gradient()
        return concatenate((D["C0.Lu"], D["C1.Lu"]))

    random = RandomState(0)
    assert_allclose(check_grad(func, grad, random.randn(5)), 0, atol=1e-5)

    V = random.randn(3, 2)

    g = cov.C0.gradient()["Lu"]
    g0 = cov.gradient_dot(vec(V))["C0.Lu"]
    for i in range(2):
        assert_allclose(g0[..., i], kron(g[..., i], G @ G.T) @ vec(V))

    g = cov.C1.gradient()["Lu"]
    g0 = cov.gradient_dot(vec(V))["C1.Lu"]
    for i in range(3):
        assert_allclose(g0[..., i], kron(g[..., i], eye(3)) @ vec(V))

    V = random.randn(3, 2, 4)

    g = cov.C0.gradient()["Lu"]
    g0 = cov.gradient_dot(vec(V))["C0.Lu"]
    for i in range(2):
        for j in range(4):
            assert_allclose(g0[j, ..., i], kron(g[..., i], G @ G.T) @ vec(V[..., j]))

    g = cov.C1.gradient()["Lu"]
    g0 = cov.gradient_dot(vec(V))["C1.Lu"]
    for i in range(3):
        for j in range(4):
            assert_allclose(g0[j, ..., i], kron(g[..., i], eye(3)) @ vec(V[..., j]))

    M = random.randn(2 * G.shape[0], 2 * 4)

    R = cov.LdKL_dot(M)
    dK = cov.gradient()
    L = kron(cov.Lh, cov.Lx)

    for i in range(cov.C0.shape[0]):
        for j in range(M.shape[1]):
            expected = L @ dK["C0.Lu"][..., i] @ L.T @ M[:, [j]]
            assert_allclose(R["C0.Lu"][:, [j], i], expected, atol=1e-7)

    for i in range(cov.C1.shape[0]):
        for j in range(M.shape[1]):
            expected = L @ dK["C1.Lu"][..., i] @ L.T @ M[:, [j]]
            assert_allclose(R["C1.Lu"][:, [j], i], expected, atol=1e-7)
예제 #13
0
 def B(self, v):
     self._vecB.value = vec(asarray(v, float))