Esempio n. 1
0
def test_glmmexpfam_layout():
    y = asarray([1.0, 0.5])
    X = asarray([[0.5, 1.0]])
    K = asarray([[1.0, 0.0], [0.0, 1.0]])
    QS = economic_qs(K)

    with pytest.raises(ValueError):
        GLMMExpFam(y, "poisson", X, QS=QS)

    y = asarray([1.0])
    with pytest.raises(ValueError):
        GLMMExpFam(y, "poisson", X, QS=QS)
Esempio n. 2
0
def test_glmmexpfam_qs_none():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    z = random.multivariate_normal(0.2 * ones(nsamples), K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, None)

    assert_allclose(glmm.lml(), -38.30173374439622, atol=ATOL, rtol=RTOL)
    glmm.fix("beta")
    glmm.fix("scale")

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -32.03927471370041, atol=ATOL, rtol=RTOL)

    glmm.unfix("beta")
    glmm.unfix("scale")

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -19.575736561760586, atol=ATOL, rtol=RTOL)
Esempio n. 3
0
def test_glmmexpfam_precise():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = [random.randint(0, i) for i in ntri]

    glmm = GLMMExpFam(nsuc, ["binomial", ntri], X, QS)
    glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4])

    glmm.scale = 1.0
    assert_allclose(glmm.lml(), -44.74191041468836, atol=ATOL, rtol=RTOL)
    glmm.scale = 2.0
    assert_allclose(glmm.lml(), -36.19907331929086, atol=ATOL, rtol=RTOL)
    glmm.scale = 3.0
    assert_allclose(glmm.lml(), -33.02139830387104, atol=ATOL, rtol=RTOL)
    glmm.scale = 4.0
    assert_allclose(glmm.lml(), -31.42553401678996, atol=ATOL, rtol=RTOL)
    glmm.scale = 5.0
    assert_allclose(glmm.lml(), -30.507029479473243, atol=ATOL, rtol=RTOL)
    glmm.scale = 6.0
    assert_allclose(glmm.lml(), -29.937569702301232, atol=ATOL, rtol=RTOL)
    glmm.delta = 0.1
    assert_allclose(glmm.lml(), -30.09977907145003, atol=ATOL, rtol=RTOL)

    assert_allclose(glmm._check_grad(), 0, atol=1e-3, rtol=RTOL)
Esempio n. 4
0
def estimate(y, lik, K, M=None, verbose=True):
    from numpy_sugar.linalg import economic_qs
    from numpy import pi, var, diag
    from glimix_core.glmm import GLMMExpFam
    from glimix_core.lmm import LMM
    from limix._data._assert import assert_likelihood
    from limix._data import normalize_likelihood, conform_dataset
    from limix.qtl._assert import assert_finite
    from limix._display import session_block, session_line
    lik = normalize_likelihood(lik)
    lik_name = lik[0]
    with session_block("Heritability analysis", disable=not verbose):
        with session_line("Normalising input...", disable=not verbose):
            data = conform_dataset(y, M=M, K=K)
        y = data["y"]
        M = data["M"]
        K = data["K"]
        assert_finite(y, M, K)
        if K is not None:
            # K = K / diag(K).mean()
            QS = economic_qs(K)
        else:
            QS = None
        if lik_name == "normal":
            method = LMM(y.values, M.values, QS, restricted=True)
            method.fit(verbose=verbose)
        else:
            method = GLMMExpFam(y, lik, M.values, QS, n_int=500)
            method.fit(verbose=verbose, factr=1e6, pgtol=1e-3)
        g = method.scale * (1 - method.delta)
        e = method.scale * method.delta
        if lik_name == "bernoulli":
            e += pi * pi / 3
        v = var(method.mean())
        return g, v, e
Esempio n. 5
0
def test_glmmexpfam_optimize():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    z = random.multivariate_normal(0.2 * ones(nsamples), K)
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)

    assert_allclose(glmm.lml(), -29.102168129099287, atol=ATOL, rtol=RTOL)
    glmm.fix("beta")
    glmm.fix("scale")

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -27.635788105778012, atol=ATOL, rtol=RTOL)

    glmm.unfix("beta")
    glmm.unfix("scale")

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -19.68486269551159, atol=ATOL, rtol=RTOL)
Esempio n. 6
0
def estimate(y_phe, lik, kin, marker_mat=None, verbose=True):
    ''' estimate variance components '''
    lik = normalize_likelihood(lik)
    lik_name = lik[0]
    with session_block("Heritability analysis", disable=not verbose):
        with session_line("Normalising input...", disable=not verbose):
            data = conform_dataset(y_phe, M=marker_mat, K=kin)
        y_phe = data["y"]
        marker_mat = data["M"]
        kin = data["K"]
        assert_finite(y_phe, marker_mat, kin)
        if kin is not None:
            # K = K / diag(K).mean()
            q_s = economic_qs(kin)
        else:
            q_s = None
        if lik_name == "normal":
            method = LMM(y_phe.values, marker_mat.values, q_s, restricted=True)
            method.fit(verbose=verbose)
        else:
            method = GLMMExpFam(y_phe, lik, marker_mat.values, q_s, n_int=500)
            method.fit(verbose=verbose, factr=1e6, pgtol=1e-3)
        v_g = method.scale * (1 - method.delta)
        v_e = method.scale * method.delta
        if lik_name == "bernoulli":
            v_e += pi * pi / 3
        v_v = var(method.mean())
        return v_g, v_v, v_e
Esempio n. 7
0
def test_glmmexpfam_delta_one_zero():
    random = RandomState(1)
    n = 30
    X = random.randn(n, 6)
    K = dot(X, X.T)
    K /= K.diagonal().mean()
    QS = economic_qs(K)

    ntri = random.randint(1, 30, n)
    nsuc = [random.randint(0, i) for i in ntri]

    glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)
    glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4, -0.2])

    glmm.delta = 0
    assert_allclose(glmm.lml(), -113.24570457063275)
    assert_allclose(glmm._check_grad(step=1e-4), 0, atol=1e-2)

    glmm.fit(verbose=False)
    assert_allclose(glmm.lml(), -98.21144899310399, atol=ATOL, rtol=RTOL)
    assert_allclose(glmm.delta, 0, atol=ATOL, rtol=RTOL)

    glmm.delta = 1
    assert_allclose(glmm.lml(), -98.00058169240869, atol=ATOL, rtol=RTOL)
    assert_allclose(glmm._check_grad(step=1e-4), 0, atol=1e-1)

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -72.82680948264196, atol=ATOL, rtol=RTOL)
    assert_allclose(glmm.delta, 0.9999999850988439, atol=ATOL, rtol=RTOL)
Esempio n. 8
0
def test_glmmexpfam_wrong_qs():
    random = RandomState(0)
    X = random.randn(10, 15)
    linear_eye_cov().value()
    QS = [0, 1]

    ntri = random.randint(1, 30, 10)
    nsuc = [random.randint(0, i) for i in ntri]

    with pytest.raises(ValueError):
        GLMMExpFam((nsuc, ntri), "binomial", X, QS)
Esempio n. 9
0
def test_glmmexpfam_predict():

    random = RandomState(4)
    n = 100
    p = n + 1

    X = ones((n, 2))
    X[:, 1] = random.randn(n)

    G = random.randn(n, p)
    G /= G.std(0)
    G -= G.mean(0)
    G /= sqrt(p)
    K = dot(G, G.T)

    i = asarray(arange(0, n), int)
    si = random.choice(i, n, replace=False)
    ntest = int(n // 5)
    itrain = si[:-ntest]
    itest = si[-ntest:]

    Xtrain = X[itrain, :]
    Ktrain = K[itrain, :][:, itrain]

    Xtest = X[itest, :]

    beta = random.randn(2)
    z = random.multivariate_normal(dot(X, beta), 0.9 * K + 0.1 * eye(n))

    ntri = random.randint(1, 100, n)
    nsuc = zeros(n, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)

    QStrain = economic_qs(Ktrain)
    nsuc_train = ascontiguousarray(nsuc[itrain])
    ntri_train = ascontiguousarray(ntri[itrain])

    nsuc_test = ascontiguousarray(nsuc[itest])
    ntri_test = ascontiguousarray(ntri[itest])

    glmm = GLMMExpFam(nsuc_train, ("binomial", ntri_train), Xtrain, QStrain)
    glmm.fit(verbose=False)
    ks = K[itest, :][:, itrain]
    kss = asarray([K[i, i] for i in itest])
    pm = glmm.predictive_mean(Xtest, ks, kss)
    pk = glmm.predictive_covariance(Xtest, ks, kss)
    r = nsuc_test / ntri_test
    assert_(corrcoef([pm, r])[0, 1] > 0.8)
    assert_allclose(pk[0], 54.263705682514846)
Esempio n. 10
0
 def _fit_glmm_simple_model(self, verbose):
     from numpy_sugar.linalg import economic_qs
     from glimix_core.glmm import GLMMExpFam
     from numpy import asarray
     K = self._get_matrix_simple_model()
     y = asarray(self._y, float).ravel()
     QS = None
     if K is not None:
         QS = economic_qs(K)
     glmm = GLMMExpFam(y, self._lik, self._M, QS)
     glmm.fit(verbose=verbose)
     self._set_simple_model_variances(glmm.v0, glmm.v1)
     self._glmm = glmm
Esempio n. 11
0
def test_glmmexpfam_poisson():
    from numpy import ones, stack, exp, zeros
    from numpy.random import RandomState
    from numpy_sugar.linalg import economic_qs
    from pandas import DataFrame

    random = RandomState(1)

    # sample size
    n = 30

    # covariates
    offset = ones(n) * random.randn()
    age = random.randint(16, 75, n)
    M = stack((offset, age), axis=1)
    M = DataFrame(stack([offset, age], axis=1), columns=["offset", "age"])
    M["sample"] = [f"sample{i}" for i in range(n)]
    M = M.set_index("sample")

    # genetic variants
    G = random.randn(n, 4)

    # sampling the phenotype
    alpha = random.randn(2)
    beta = random.randn(4)
    eps = random.randn(n)
    y = M @ alpha + G @ beta + eps

    # Whole genotype of each sample.
    X = random.randn(n, 50)
    # Estimate a kinship relationship between samples.
    X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1])
    K = X_ @ X_.T + eye(n) * 0.1
    # Update the phenotype
    y += random.multivariate_normal(zeros(n), K)
    y = (y - y.mean()) / y.std()

    z = y.copy()
    y = random.poisson(exp(z))

    M = M - M.mean(0)
    QS = economic_qs(K)
    glmm = GLMMExpFam(y, "poisson", M, QS)
    assert_allclose(glmm.lml(), -52.479557279193585)
    glmm.fit(verbose=False)
    assert_allclose(glmm.lml(), -34.09720756737648)
Esempio n. 12
0
def test_glmmexpfam_scale_very_high():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = [random.randint(0, i) for i in ntri]

    glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)
    glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4])

    glmm.scale = 30.0
    assert_allclose(glmm.lml(), -29.632791380478736, atol=ATOL, rtol=RTOL)

    assert_allclose(glmm._check_grad(), 0, atol=1e-3)
Esempio n. 13
0
def test_glmmexpfam_delta1():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = [random.randint(0, i) for i in ntri]

    glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)
    glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4])

    glmm.delta = 1

    assert_allclose(glmm.lml(), -47.09677870648636, atol=ATOL, rtol=RTOL)
    assert_allclose(glmm._check_grad(), 0, atol=1e-4)
Esempio n. 14
0
File: _iscan.py Progetto: phue/limix
def _glmm(y, lik, M, QS, verbose):
    from glimix_core.glmm import GLMMExpFam, GLMMNormal

    glmm = GLMMExpFam(y.ravel(), lik, M, QS)

    glmm.fit(verbose=verbose)
    v0 = glmm.v0
    v1 = glmm.v1
    sys.stdout.flush()

    eta = glmm.site.eta
    tau = glmm.site.tau

    gnormal = GLMMNormal(eta, tau, M, QS)
    gnormal.fit(verbose=verbose)

    scanner = ScannerWrapper(gnormal.get_fast_scanner())

    return scanner, v0, v1
Esempio n. 15
0
def test_glmmexpfam_optimize_low_rank():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = dot(X, X.T)
    z = dot(X, 0.2 * random.randn(5))
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)

    assert_allclose(glmm.lml(), -18.60476792256323, atol=ATOL, rtol=RTOL)
    glmm.fit(verbose=False)
    assert_allclose(glmm.lml(), -7.800621320491801, atol=ATOL, rtol=RTOL)
Esempio n. 16
0
def test_glmmexpfam_binomial_large_ntrials():
    random = RandomState(0)
    n = 10

    X = random.randn(n, 2)
    G = random.randn(n, 100)
    K = dot(G, G.T)
    ntrials = random.randint(1, 100000, n)
    z = dot(G, random.randn(100)) / sqrt(100)

    successes = zeros(len(ntrials), int)
    for i in range(len(ntrials)):
        for _ in range(ntrials[i]):
            successes[i] += int(z[i] + 0.1 * random.randn() > 0)

    QS = economic_qs(K)
    glmm = GLMMExpFam(successes, ("binomial", ntrials), X, QS)
    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -43.067433588125446)
Esempio n. 17
0
def test_glmmexpfam_bernoulli_probit_assure_delta_fixed():
    random = RandomState(1)
    N = 10
    G = random.randn(N, N + 50)
    y = bernoulli_sample(0.0, G, random_state=random)

    G = ascontiguousarray(G, dtype=float)
    _stdnorm(G, 0, out=G)
    G /= sqrt(G.shape[1])

    QS = economic_qs_linear(G)
    S0 = QS[1]
    S0 /= S0.mean()

    X = ones((len(y), 1))
    model = GLMMExpFam(y, "probit", X, QS=(QS[0], QS[1]))
    model.fit(verbose=False)

    assert_allclose(model.lml(), -6.108751595773174, rtol=RTOL)
    assert_allclose(model.delta, 1.4901161193847673e-08, atol=1e-5)
    assert_(model._isfixed("logitdelta"))
Esempio n. 18
0
def _perform_glmm(y, lik, M, K, QS, G, verbose):
    from glimix_core.glmm import GLMMExpFam, GLMMNormal
    from pandas import Series
    from xarray import DataArray

    glmm = GLMMExpFam(y.ravel(), lik, M.values, QS)
    glmm.fit(verbose=verbose)
    sys.stdout.flush()

    eta = glmm.site.eta
    tau = glmm.site.tau

    gnormal = GLMMNormal(eta, tau, M.values, QS)
    gnormal.fit(verbose=verbose)

    beta = gnormal.beta

    covariates = list(M.coords["covariate"].values)
    ncov_effsizes = Series(beta, covariates)

    flmm = gnormal.get_fast_scanner()
    flmm.set_scale(1.0)
    null_lml = flmm.null_lml()

    if hasattr(G, "data"):
        values = G.data
    else:
        values = G.values
    alt_lmls, effsizes = flmm.fast_scan(values, verbose=verbose)

    coords = {
        k: ("candidate", G.coords[k].values)
        for k in G.coords.keys()
        if G.coords[k].dims[0] == "candidate"
    }

    alt_lmls = DataArray(alt_lmls, dims=["candidate"], coords=coords)
    effsizes = DataArray(effsizes, dims=["candidate"], coords=coords)

    return QTLModel(null_lml, alt_lmls, effsizes, ncov_effsizes)
Esempio n. 19
0
def test_glmmexpfam_bernoulli_probit_problematic():
    random = RandomState(1)
    N = 30
    G = random.randn(N, N + 50)
    y = bernoulli_sample(0.0, G, random_state=random)

    G = ascontiguousarray(G, dtype=float)
    _stdnorm(G, 0, out=G)
    G /= sqrt(G.shape[1])

    QS = economic_qs_linear(G)
    S0 = QS[1]
    S0 /= S0.mean()

    X = ones((len(y), 1))
    model = GLMMExpFam(y, "probit", X, QS=(QS[0], QS[1]))
    model.delta = 0
    model.fix("delta")
    model.fit(verbose=False)
    assert_allclose(model.lml(), -20.725623168378615, atol=ATOL, rtol=RTOL)
    assert_allclose(model.delta, 0.0001220703125, atol=1e-3)
    assert_allclose(model.scale, 0.33022865011938707, atol=ATOL, rtol=RTOL)
    assert_allclose(model.beta, [-0.002617161564786044], atol=ATOL, rtol=RTOL)

    h20 = model.scale * (1 - model.delta) / (model.scale + 1)

    model.unfix("delta")
    model.delta = 0.5
    model.scale = 1.0
    model.fit(verbose=False)

    assert_allclose(model.lml(), -20.725623168378522, atol=ATOL, rtol=RTOL)
    assert_allclose(model.delta, 0.5017852859580029, atol=1e-3)
    assert_allclose(model.scale, 0.9928931515372, atol=ATOL, rtol=RTOL)
    assert_allclose(model.beta, [-0.003203427206253548], atol=ATOL, rtol=RTOL)

    h21 = model.scale * (1 - model.delta) / (model.scale + 1)

    assert_allclose(h20, h21, atol=ATOL, rtol=RTOL)
Esempio n. 20
0
def test_glmmexpfam_poisson():
    random = RandomState(1)

    # sample size
    n = 30

    # covariates
    offset = ones(n) * random.randn()
    age = random.randint(16, 75, n)
    M = stack((offset, age), axis=1)

    # genetic variants
    G = random.randn(n, 4)

    # sampling the phenotype
    alpha = random.randn(2)
    beta = random.randn(4)
    eps = random.randn(n)
    y = M @ alpha + G @ beta + eps

    # Whole genotype of each sample.
    X = random.randn(n, 50)
    # Estimate a kinship relationship between samples.
    X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1])
    K = X_ @ X_.T + eye(n) * 0.1
    # Update the phenotype
    y += random.multivariate_normal(zeros(n), K)
    y = (y - y.mean()) / y.std()

    z = y.copy()
    y = random.poisson(exp(z))

    M = M - M.mean(0)
    QS = economic_qs(K)
    glmm = GLMMExpFam(y, "poisson", M, QS)
    assert_allclose(glmm.lml(), -52.479557279193585)
    glmm.fit(verbose=False)
    assert_allclose(glmm.lml(), -34.09720756737648)
Esempio n. 21
0
def _st_glmm(y, lik, M, QS, verbose):
    from numpy import nan
    from glimix_core.glmm import GLMMExpFam, GLMMNormal

    glmm = GLMMExpFam(y, lik, M, QS)

    glmm.fit(verbose=verbose)

    if QS is None:
        v0 = nan
    else:
        v0 = glmm.v0

    v1 = glmm.v1
    sys.stdout.flush()

    eta = glmm.site.eta
    tau = glmm.site.tau

    gnormal = GLMMNormal(eta, tau, M, QS)
    gnormal.fit(verbose=verbose)

    return gnormal.get_fast_scanner(), v0, v1
Esempio n. 22
0
def test_glmmexpfam_bernoulli_problematic():
    random = RandomState(1)
    N = 30
    G = random.randn(N, N + 50)
    y = bernoulli_sample(0.0, G, random_state=random)

    G = ascontiguousarray(G, dtype=float)
    _stdnorm(G, 0, out=G)
    G /= sqrt(G.shape[1])

    QS = economic_qs_linear(G)
    S0 = QS[1]
    S0 /= S0.mean()

    X = ones((len(y), 1))
    model = GLMMExpFam(y, "bernoulli", X, QS=(QS[0], QS[1]))
    model.delta = 0
    model.fix("delta")
    model.fit(verbose=False)
    assert_allclose(model.lml(), -20.727007958026853, atol=ATOL, rtol=RTOL)
    assert_allclose(model.delta, 0, atol=1e-3)
    assert_allclose(model.scale, 0.879915823030081, atol=ATOL, rtol=RTOL)
    assert_allclose(model.beta, [-0.00247856564728], atol=ATOL, rtol=RTOL)
Esempio n. 23
0
def test_glmmexpfam_copy():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    z = random.multivariate_normal(0.2 * ones(nsamples), K)
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm0 = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)

    assert_allclose(glmm0.lml(), -29.10216812909928, atol=ATOL, rtol=RTOL)
    glmm0.fit(verbose=False)

    v = -19.575736562427252
    assert_allclose(glmm0.lml(), v)

    glmm1 = glmm0.copy()
    assert_allclose(glmm1.lml(), v)

    glmm1.scale = 0.92
    assert_allclose(glmm0.lml(), v, atol=ATOL, rtol=RTOL)
    assert_allclose(glmm1.lml(), -30.832831740038056, atol=ATOL, rtol=RTOL)

    glmm0.fit(verbose=False)
    glmm1.fit(verbose=False)

    v = -19.575736562378573
    assert_allclose(glmm0.lml(), v)
    assert_allclose(glmm1.lml(), v)
Esempio n. 24
0
def estimate(pheno, lik, K, covs=None, verbose=True):
    r"""Estimate the so-called narrow-sense heritability.

    It supports Normal, Bernoulli, Binomial, and Poisson phenotypes.
    Let :math:`N` be the sample size and :math:`S` the number of covariates.

    Parameters
    ----------
    pheno : tuple, array_like
        Phenotype. Dimensions :math:`N\\times 0`.
    lik : {'normal', 'bernoulli', 'binomial', 'poisson'}
        Likelihood name.
    K : array_like
        Kinship matrix. Dimensions :math:`N\\times N`.
    covs : array_like
        Covariates. Default is an offset. Dimensions :math:`N\\times S`.

    Returns
    -------
    float
        Estimated heritability.

    Examples
    --------
    .. doctest::

        >>> from numpy import dot, exp, sqrt
        >>> from numpy.random import RandomState
        >>> from limix.heritability import estimate
        >>>
        >>> random = RandomState(0)
        >>>
        >>> G = random.randn(50, 100)
        >>> K = dot(G, G.T)
        >>> z = dot(G, random.randn(100)) / sqrt(100)
        >>> y = random.poisson(exp(z))
        >>>
        >>> print('%.2f' % estimate(y, 'poisson', K, verbose=False))
        0.70
    """

    K = _background_standardize(K)
    QS = economic_qs(K)

    lik = lik.lower()

    if lik == "binomial":
        p = len(pheno[0])
    else:
        p = len(pheno)

    if covs is None:
        covs = ones((p, 1))

    glmm = GLMMExpFam(pheno, lik, covs, QS)
    glmm.feed().maximize(verbose=verbose)

    g = glmm.scale * (1 - glmm.delta)
    e = glmm.scale * glmm.delta
    h2 = g / (var(glmm.mean()) + g + e)

    return h2
Esempio n. 25
0
import numpy as np
import numpy_sugar as ns
from glimix_core.glmm import GLMMExpFam
from time import time

G = np.load('null_G.npy')
ntri = np.load('null_ntri.npy')
nsuc = np.load('null_nsuc.npy')
N, P = G.shape

QS = ns.linalg.economic_qs(G.dot(G.T))
X = np.ones((N, 1))

ntri = np.asarray(ntri, float)
nsuc = np.asarray(nsuc, float)

start = time()
glmm = GLMMExpFam((nsuc, ntri), "binomial", X, QS)
glmm.fit(verbose=True)
stop = time()
elapsed = stop - start
print("Elapsed: {}".format(elapsed))
np.save("out/fastglmm_N{}".format(N), elapsed)
Esempio n. 26
0
def estimate(y, lik, K, M=None, verbose=True):
    r"""Estimate the so-called narrow-sense heritability.

    It supports Normal, Bernoulli, Probit, Binomial, and Poisson phenotypes.
    Let :math:`N` be the sample size and :math:`S` the number of covariates.

    Parameters
    ----------
    y : array_like
        Either a tuple of two arrays of `N` individuals each (Binomial
        phenotypes) or an array of `N` individuals (Normal, Poisson, or
        Bernoulli phenotypes). If a continuous phenotype is provided (i.e., a Normal
        one), make sure they have been normalised in such a way that its values are
        not extremely large; it might cause numerical errors otherwise. For example,
        by using :func:`limix.qc.mean_standardize` or
        :func:`limix.qc.quantile_gaussianize`.
    lik : "normal", "bernoulli", "probit", binomial", "poisson"
        Sample likelihood describing the residual distribution.
    K : array_like
        :math:`N`-by-:math:`N` covariance matrix. It might be, for example, the
        estimated kinship relationship between the individuals. The provided matrix will
        be normalised via the function :func:`limix.qc.normalise_covariance`.
    M : array_like, optional
        :math:`N` individuals by :math:`S` covariates.
        It will create a :math:`N`-by-:math:`1` matrix ``M`` of ones representing the offset
        covariate if ``None`` is passed. If an array is passed, it will used as is.
        Defaults to ``None``.
    verbose : bool, optional
        ``True`` to display progress and summary; ``False`` otherwise.

    Returns
    -------
    float
        Estimated heritability.

    Examples
    --------
    .. doctest::

        >>> from numpy import dot, exp, sqrt
        >>> from numpy.random import RandomState
        >>> from limix.her import estimate
        >>>
        >>> random = RandomState(0)
        >>>
        >>> G = random.randn(150, 200) / sqrt(200)
        >>> K = dot(G, G.T)
        >>> z = dot(G, random.randn(200)) + random.randn(150)
        >>> y = random.poisson(exp(z))
        >>>
        >>> print('%.3f' % estimate(y, 'poisson', K, verbose=False))  # doctest: +FLOAT_CMP
        0.183

    Notes
    -----
    It will raise a ``ValueError`` exception if non-finite values are passed. Please,
    refer to the :func:`limix.qc.mean_impute` function for missing value imputation.
    """
    from numpy_sugar import is_all_finite
    from numpy_sugar.linalg import economic_qs
    from numpy import ones, pi, var
    from glimix_core.glmm import GLMMExpFam
    from glimix_core.lmm import LMM

    if not isinstance(lik, (tuple, list)):
        lik = (lik,)

    lik_name = lik[0].lower()
    check_likelihood_name(lik_name)

    with session_block("heritability analysis", disable=not verbose):

        if M is None:
            M = ones((len(y), 1))

        with session_line("Normalising input...", disable=not verbose):
            data = conform_dataset(y, M=M, K=K)

        y = data["y"]
        M = data["M"]
        K = data["K"]

        if not is_all_finite(y):
            raise ValueError("Outcome must have finite values only.")

        if not is_all_finite(M):
            raise ValueError("Covariates must have finite values only.")

        if K is not None:
            if not is_all_finite(K):
                raise ValueError("Covariate matrix must have finite values only.")

            K = normalise_covariance(K)

        y = normalise_extreme_values(y, lik)

        if K is not None:
            QS = economic_qs(K)
        else:
            QS = None

        if lik_name == "normal":
            method = LMM(y.values, M.values, QS)
            method.fit(verbose=verbose)
        else:
            method = GLMMExpFam(y, lik, M.values, QS, n_int=500)
            method.fit(verbose=verbose, factr=1e6, pgtol=1e-3)

        g = method.scale * (1 - method.delta)
        e = method.scale * method.delta
        if lik_name == "bernoulli":
            e += pi * pi / 3

        if lik_name == "normal":
            v = method.fixed_effects_variance
        else:
            v = var(method.mean())

        return g / (v + g + e)
Esempio n. 27
0
def qtl_test_glmm(
    snps,
    pheno,
    lik,
    K,
    covs=None,
    test="lrt",
    NumIntervalsDeltaAlt=100,
    searchDelta=False,
    verbose=True,
):
    """
    Wrapper function for univariate single-variant association testing
    using a generalised linear mixed model.

    Args:
        snps (array_like):
            `N` individuals by `S` SNPs.
        pheno (tuple, array_like):
            Either a tuple of two arrays of `N` individuals each (Binomial
            phenotypes) or an array of `N` individuals (Poisson or Bernoulli
            phenotypes). It does not support missing values yet.
        lik ({'bernoulli', 'binomial', 'poisson'}):
            Sample likelihood describing the residual distribution.
        K (array_like):
            `N` by `N` covariance matrix (e.g., kinship coefficients).
        covs (array_like, optional):
            `N` individuals by `D` covariates.
            By default, ``covs`` is a (`N`, `1`) array of ones.
        test ({'lrt'}, optional):
            Likelihood ratio test (default).
        NumIntervalsDeltaAlt (int, optional):
            number of steps for delta optimization on the alternative model.
            Requires ``searchDelta=True`` to have an effect.
        searchDelta (bool, optional):
            if ``True``, delta optimization on the alternative model is
            carried out. By default ``searchDelta`` is ``False``.
        verbose (bool, optional):
            if ``True``, details such as runtime are displayed.

    Returns:
        :class:`limix.qtl.LMM`: LIMIX LMM object

    Examples
    --------
    .. doctest::

        >>> from numpy import dot, exp, sqrt
        >>> from numpy.random import RandomState
        >>> from limix.qtl import qtl_test_glmm
        >>>
        >>> random = RandomState(0)
        >>>
        >>> G = random.randn(250, 500) / sqrt(500)
        >>> beta = 0.01 * random.randn(500)
        >>>
        >>> z = dot(G, beta) + 0.1 * random.randn(250)
        >>> z += dot(G[:, 0], 1) # causal SNP
        >>>
        >>> y = random.poisson(exp(z))
        >>>
        >>> candidates = G[:, :5]
        >>> K = dot(G[:, 5:], G[:, 5:].T)
        >>> lm = qtl_test_glmm(candidates, y, 'poisson', K, verbose=False)
        >>>
        >>> print(lm.getPv())
        [[0.0694 0.3336 0.5899 0.7388 0.7796]]
    """

    snps = _asarray(snps)

    if covs is None:
        covs = ones((snps.shape[0], 1))
    else:
        covs = _asarray(covs)

    K = _asarray(K)

    if isinstance(pheno, (tuple, list)):
        y = tuple([asarray(p, float) for p in pheno])
    else:
        y = asarray(pheno, float)

    start = time()
    QS = economic_qs(K)
    glmm = GLMMExpFam(y, lik, covs, QS)
    glmm.feed().maximize(verbose=verbose)

    # extract stuff from glmm
    eta = glmm.site.eta
    tau = glmm.site.tau
    scale = float(glmm.scale)
    delta = float(glmm.delta)

    # define useful quantities
    mu = eta / tau
    var = 1. / tau
    s2_g = scale * (1 - delta)
    tR = s2_g * K + diag(var - var.min() + 1e-4)

    start = time()
    lmm = LMM(snps=snps, pheno=mu, K=tR, covs=covs, verbose=verbose)
    # if verbose:
    #     print("Elapsed time for LMM part: %.3f" % (time() - start))

    return lmm