예제 #1
0
def test_fast_scanner_statsmodel_gls():
    import statsmodels.api as sm
    from numpy.linalg import lstsq

    def _lstsq(A, B):
        return lstsq(A, B, rcond=None)[0]

    data = sm.datasets.longley.load()
    data.exog = sm.add_constant(data.exog)
    ols_resid = sm.OLS(data.endog, data.exog).fit().resid
    resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit()
    rho = resid_fit.params[1]
    order = toeplitz(range(len(ols_resid)))
    sigma = rho ** order

    QS = economic_qs(sigma)
    lmm = LMM(data.endog, data.exog, QS)
    lmm.fit(verbose=False)

    sigma = lmm.covariance()
    scanner = lmm.get_fast_scanner()
    best_beta_se = _lstsq(data.exog.T @ _lstsq(lmm.covariance(), data.exog), eye(7))
    best_beta_se = sqrt(best_beta_se.diagonal())
    assert_allclose(scanner.null_beta_se, best_beta_se, atol=1e-5)

    endog = data.endog.copy()
    endog -= endog.mean(0)
    endog /= endog.std(0)

    exog = data.exog.copy()
    exog -= exog.mean(0)
    with errstate(invalid="ignore", divide="ignore"):
        exog /= exog.std(0)
    exog[:, 0] = 1

    lmm = LMM(endog, exog, QS)
    lmm.fit(verbose=False)

    sigma = lmm.covariance()
    scanner = lmm.get_fast_scanner()

    gls_model = sm.GLS(endog, exog, sigma=sigma)
    gls_results = gls_model.fit()
    beta_se = gls_results.bse
    our_beta_se = sqrt(scanner.null_beta_covariance.diagonal())
    # statsmodels scales the covariance matrix we pass, that is why
    # we need to account for it here.
    assert_allclose(our_beta_se, beta_se / sqrt(gls_results.scale))
    assert_allclose(scanner.null_beta_se, beta_se / sqrt(gls_results.scale))
예제 #2
0
def test_lmm_interface():
    random = RandomState(1)
    n = 3
    G = random.randn(n, n + 1)
    X = random.randn(n, 2)
    y = X @ random.randn(2) + G @ random.randn(G.shape[1]) + random.randn(n)
    y -= y.mean(0)
    y /= y.std(0)

    QS = economic_qs_linear(G)
    lmm = LMM(y, X, QS, restricted=False)
    lmm.name = "lmm"
    lmm.fit(verbose=False)

    assert_allclose(
        lmm.covariance(),
        [
            [0.436311031439718, 2.6243891396439837e-16, 2.0432156171727483e-16],
            [2.6243891396439837e-16, 0.4363110314397185, 4.814313140426306e-16],
            [2.0432156171727483e-16, 4.814313140426305e-16, 0.43631103143971817],
        ],
        atol=1e-7,
    )
    assert_allclose(
        lmm.mean(),
        [0.6398184791042468, -0.8738254794097052, 0.7198112606871158],
        atol=1e-7,
    )
    assert_allclose(lmm.lml(), -3.012715726960625, atol=1e-7)
    assert_allclose(lmm.value(), lmm.lml(), atol=1e-7)
    assert_allclose(lmm.lml(), -3.012715726960625, atol=1e-7)
    assert_allclose(
        lmm.X,
        [
            [-0.3224172040135075, -0.38405435466841564],
            [1.1337694423354374, -1.0998912673140309],
            [-0.17242820755043575, -0.8778584179213718],
        ],
        atol=1e-7,
    )
    assert_allclose(lmm.beta, [-1.3155159120000266, -0.5615702941530938], atol=1e-7)
    assert_allclose(
        lmm.beta_covariance,
        [
            [0.44737305797088345, 0.20431961864892412],
            [0.20431961864892412, 0.29835835133251526],
        ],
        atol=1e-7,
    )
    assert_allclose(lmm.delta, 0.9999999999999998, atol=1e-7)
    assert_equal(lmm.ncovariates, 2)
    assert_equal(lmm.nsamples, 3)
    assert_allclose(lmm.scale, 0.43631103143971767, atol=1e-7)
    assert_allclose(lmm.v0, 9.688051060046502e-17, atol=1e-7)
    assert_allclose(lmm.v1, 0.43631103143971756, atol=1e-7)
    assert_equal(lmm.name, "lmm")

    with pytest.raises(NotImplementedError):
        lmm.gradient()
예제 #3
0
def test_lmm_scan():
    random = RandomState(9458)
    n = 30
    X = _covariates_sample(random, n, n + 1)
    offset = 1.0
    y = _outcome_sample(random, offset, X)
    QS = economic_qs_linear(X)
    M0 = random.randn(n, 2)
    M1 = random.randn(n, 2)

    lmm = LMM(y, M0, QS)
    lmm.fit(verbose=False)

    v0 = lmm.v0
    v1 = lmm.v1
    K = v0 * X @ X.T + v1 * eye(n)
    M = concatenate((M0, M1), axis=1)

    def fun(x):
        beta = x[:4]
        scale = exp(x[4])
        return -st.multivariate_normal(M @ beta, scale * K).logpdf(y)

    res = minimize(fun, [0, 0, 0, 0, 0])
    scanner = lmm.get_fast_scanner()
    r = scanner.scan(M1)

    assert_allclose(r["lml"], -res.fun)
    assert_allclose(r["effsizes0"], res.x[:2], rtol=1e-5)
    assert_allclose(r["effsizes1"], res.x[2:4], rtol=1e-5)
    assert_allclose(r["scale"], exp(res.x[4]), rtol=1e-5)
    K = r["scale"] * lmm.covariance()
    M = concatenate((M0, M1), axis=1)
    effsizes_se = sqrt(inv(M.T @ solve(K, M)).diagonal())
    assert_allclose(effsizes_se,
                    concatenate((r["effsizes0_se"], r["effsizes1_se"])))

    assert_allclose(scanner.null_lml(), -53.805721275578456, rtol=1e-5)
    assert_allclose(scanner.null_beta,
                    [0.26521964226797085, 0.4334778669761928],
                    rtol=1e-5)
    assert_allclose(
        scanner.null_beta_covariance,
        [
            [0.06302553593799207, 0.00429640179038484],
            [0.004296401790384839, 0.05591392416235412],
        ],
        rtol=1e-5,
    )
    assert_allclose(scanner.null_scale, 1.0)

    assert_allclose(scanner.null_beta, lmm.beta, rtol=1e-5)
    assert_allclose(scanner.null_beta_covariance,
                    lmm.beta_covariance,
                    rtol=1e-5)
예제 #4
0
def test_lmm_predict():
    random = RandomState(9458)
    n = 30

    X = random.randn(n, n + 1)
    X -= X.mean(0)
    X /= X.std(0)
    X /= sqrt(X.shape[1])

    offset = 1.0

    mean = OffsetMean(n)
    mean.offset = offset

    cov_left = LinearCov(X)
    cov_left.scale = 1.5

    cov_right = EyeCov(n)
    cov_right.scale = 1.5

    cov = SumCov([cov_left, cov_right])

    lik = DeltaProdLik()

    y = GGPSampler(lik, mean, cov).sample(random)

    QS = economic_qs_linear(X)

    lmm = LMM(y, ones((n, 1)), QS)

    lmm.fit(verbose=False)

    plmm = LMMPredict(y, lmm.beta, lmm.v0, lmm.v1, lmm.mean(),
                      lmm.covariance())

    K = dot(X, X.T)
    pm = plmm.predictive_mean(ones((n, 1)), K, K.diagonal())
    assert_allclose(corrcoef(y, pm)[0, 1], 0.8358820971891354)
예제 #5
0
def test_fast_scanner_statsmodel_gls():
    from numpy.linalg import lstsq

    def _lstsq(A, B):
        return lstsq(A, B, rcond=None)[0]

    # data = sm.datasets.longley.load()
    # data.exog = sm.add_constant(data.exog)
    # ols_resid = sm.OLS(data.endog, data.exog).fit().resid
    # resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit()
    # rho = resid_fit.params[1]
    rho = -0.3634294908774683
    # order = toeplitz(range(len(ols_resid)))
    order = toeplitz(range(16))
    sigma = rho**order

    QS = economic_qs(sigma)
    endog = reshape(
        [
            60323.0,
            61122.0,
            60171.0,
            61187.0,
            63221.0,
            63639.0,
            64989.0,
            63761.0,
            66019.0,
            67857.0,
            68169.0,
            66513.0,
            68655.0,
            69564.0,
            69331.0,
            70551.0,
        ],
        (16, ),
    )
    exog = reshape(
        [
            1.0,
            83.0,
            234289.0,
            2356.0,
            1590.0,
            107608.0,
            1947.0,
            1.0,
            88.5,
            259426.0,
            2325.0,
            1456.0,
            108632.0,
            1948.0,
            1.0,
            88.2,
            258054.0,
            3682.0,
            1616.0,
            109773.0,
            1949.0,
            1.0,
            89.5,
            284599.0,
            3351.0,
            1650.0,
            110929.0,
            1950.0,
            1.0,
            96.2,
            328975.0,
            2099.0,
            3099.0,
            112075.0,
            1951.0,
            1.0,
            98.1,
            346999.0,
            1932.0,
            3594.0,
            113270.0,
            1952.0,
            1.0,
            99.0,
            365385.0,
            1870.0,
            3547.0,
            115094.0,
            1953.0,
            1.0,
            100.0,
            363112.0,
            3578.0,
            3350.0,
            116219.0,
            1954.0,
            1.0,
            101.2,
            397469.0,
            2904.0,
            3048.0,
            117388.0,
            1955.0,
            1.0,
            104.6,
            419180.0,
            2822.0,
            2857.0,
            118734.0,
            1956.0,
            1.0,
            108.4,
            442769.0,
            2936.0,
            2798.0,
            120445.0,
            1957.0,
            1.0,
            110.8,
            444546.0,
            4681.0,
            2637.0,
            121950.0,
            1958.0,
            1.0,
            112.6,
            482704.0,
            3813.0,
            2552.0,
            123366.0,
            1959.0,
            1.0,
            114.2,
            502601.0,
            3931.0,
            2514.0,
            125368.0,
            1960.0,
            1.0,
            115.7,
            518173.0,
            4806.0,
            2572.0,
            127852.0,
            1961.0,
            1.0,
            116.9,
            554894.0,
            4007.0,
            2827.0,
            130081.0,
            1962.0,
        ],
        (16, 7),
    )
    lmm = LMM(endog, exog, QS)
    lmm.fit(verbose=False)

    sigma = lmm.covariance()
    scanner = lmm.get_fast_scanner()
    best_beta_se = _lstsq(exog.T @ _lstsq(lmm.covariance(), exog), eye(7))
    best_beta_se = sqrt(best_beta_se.diagonal())
    assert_allclose(scanner.null_beta_se, best_beta_se, atol=1e-4)

    endog = endog.copy()
    endog -= endog.mean(0)
    endog /= endog.std(0)

    exog = exog.copy()
    exog -= exog.mean(0)
    with errstate(invalid="ignore", divide="ignore"):
        exog /= exog.std(0)
    exog[:, 0] = 1

    lmm = LMM(endog, exog, QS)
    lmm.fit(verbose=False)

    sigma = lmm.covariance()
    scanner = lmm.get_fast_scanner()

    # gls_model = sm.GLS(endog, exog, sigma=sigma)
    # gls_results = gls_model.fit()
    # scale = gls_results.scale
    scale = 1.7777777777782937
    # beta_se = gls_results.bse
    beta_se = array([
        0.014636888951505144,
        0.21334653097414055,
        0.7428559936739378,
        0.10174713767252333,
        0.032745906589939845,
        0.3494488802468581,
        0.4644879873404213,
    ])
    our_beta_se = sqrt(scanner.null_beta_covariance.diagonal())
    # statsmodels scales the covariance matrix we pass, that is why
    # we need to account for it here.
    assert_allclose(our_beta_se, beta_se / sqrt(scale), rtol=1e-6)
    assert_allclose(scanner.null_beta_se, beta_se / sqrt(scale), rtol=1e-6)