예제 #1
0
def test_glmmexpfam_optimize():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    z = random.multivariate_normal(0.2 * ones(nsamples), K)
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)

    assert_allclose(glmm.lml(), -29.102168129099287, atol=ATOL, rtol=RTOL)
    glmm.fix("beta")
    glmm.fix("scale")

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -27.635788105778012, atol=ATOL, rtol=RTOL)

    glmm.unfix("beta")
    glmm.unfix("scale")

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -19.68486269551159, atol=ATOL, rtol=RTOL)
예제 #2
0
def test_glmmexpfam_predict():

    random = RandomState(4)
    n = 100
    p = n + 1

    X = ones((n, 2))
    X[:, 1] = random.randn(n)

    G = random.randn(n, p)
    G /= G.std(0)
    G -= G.mean(0)
    G /= sqrt(p)
    K = dot(G, G.T)

    i = asarray(arange(0, n), int)
    si = random.choice(i, n, replace=False)
    ntest = int(n // 5)
    itrain = si[:-ntest]
    itest = si[-ntest:]

    Xtrain = X[itrain, :]
    Ktrain = K[itrain, :][:, itrain]

    Xtest = X[itest, :]

    beta = random.randn(2)
    z = random.multivariate_normal(dot(X, beta), 0.9 * K + 0.1 * eye(n))

    ntri = random.randint(1, 100, n)
    nsuc = zeros(n, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)

    QStrain = economic_qs(Ktrain)
    nsuc_train = ascontiguousarray(nsuc[itrain])
    ntri_train = ascontiguousarray(ntri[itrain])

    nsuc_test = ascontiguousarray(nsuc[itest])
    ntri_test = ascontiguousarray(ntri[itest])

    glmm = GLMMExpFam(nsuc_train, ("binomial", ntri_train), Xtrain, QStrain)
    glmm.fit(verbose=False)
    ks = K[itest, :][:, itrain]
    kss = asarray([K[i, i] for i in itest])
    pm = glmm.predictive_mean(Xtest, ks, kss)
    pk = glmm.predictive_covariance(Xtest, ks, kss)
    r = nsuc_test / ntri_test
    assert_(corrcoef([pm, r])[0, 1] > 0.8)
    assert_allclose(pk[0], 54.263705682514846)
예제 #3
0
 def _fit_glmm_simple_model(self, verbose):
     from numpy_sugar.linalg import economic_qs
     from glimix_core.glmm import GLMMExpFam
     from numpy import asarray
     K = self._get_matrix_simple_model()
     y = asarray(self._y, float).ravel()
     QS = None
     if K is not None:
         QS = economic_qs(K)
     glmm = GLMMExpFam(y, self._lik, self._M, QS)
     glmm.fit(verbose=verbose)
     self._set_simple_model_variances(glmm.v0, glmm.v1)
     self._glmm = glmm
예제 #4
0
def test_glmmexpfam_poisson():
    from numpy import ones, stack, exp, zeros
    from numpy.random import RandomState
    from numpy_sugar.linalg import economic_qs
    from pandas import DataFrame

    random = RandomState(1)

    # sample size
    n = 30

    # covariates
    offset = ones(n) * random.randn()
    age = random.randint(16, 75, n)
    M = stack((offset, age), axis=1)
    M = DataFrame(stack([offset, age], axis=1), columns=["offset", "age"])
    M["sample"] = [f"sample{i}" for i in range(n)]
    M = M.set_index("sample")

    # genetic variants
    G = random.randn(n, 4)

    # sampling the phenotype
    alpha = random.randn(2)
    beta = random.randn(4)
    eps = random.randn(n)
    y = M @ alpha + G @ beta + eps

    # Whole genotype of each sample.
    X = random.randn(n, 50)
    # Estimate a kinship relationship between samples.
    X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1])
    K = X_ @ X_.T + eye(n) * 0.1
    # Update the phenotype
    y += random.multivariate_normal(zeros(n), K)
    y = (y - y.mean()) / y.std()

    z = y.copy()
    y = random.poisson(exp(z))

    M = M - M.mean(0)
    QS = economic_qs(K)
    glmm = GLMMExpFam(y, "poisson", M, QS)
    assert_allclose(glmm.lml(), -52.479557279193585)
    glmm.fit(verbose=False)
    assert_allclose(glmm.lml(), -34.09720756737648)
예제 #5
0
파일: _iscan.py 프로젝트: phue/limix
def _glmm(y, lik, M, QS, verbose):
    from glimix_core.glmm import GLMMExpFam, GLMMNormal

    glmm = GLMMExpFam(y.ravel(), lik, M, QS)

    glmm.fit(verbose=verbose)
    v0 = glmm.v0
    v1 = glmm.v1
    sys.stdout.flush()

    eta = glmm.site.eta
    tau = glmm.site.tau

    gnormal = GLMMNormal(eta, tau, M, QS)
    gnormal.fit(verbose=verbose)

    scanner = ScannerWrapper(gnormal.get_fast_scanner())

    return scanner, v0, v1
예제 #6
0
def test_glmmexpfam_binomial_large_ntrials():
    random = RandomState(0)
    n = 10

    X = random.randn(n, 2)
    G = random.randn(n, 100)
    K = dot(G, G.T)
    ntrials = random.randint(1, 100000, n)
    z = dot(G, random.randn(100)) / sqrt(100)

    successes = zeros(len(ntrials), int)
    for i in range(len(ntrials)):
        for _ in range(ntrials[i]):
            successes[i] += int(z[i] + 0.1 * random.randn() > 0)

    QS = economic_qs(K)
    glmm = GLMMExpFam(successes, ("binomial", ntrials), X, QS)
    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -43.067433588125446)
예제 #7
0
def test_glmmexpfam_optimize_low_rank():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = dot(X, X.T)
    z = dot(X, 0.2 * random.randn(5))
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)

    assert_allclose(glmm.lml(), -18.60476792256323, atol=ATOL, rtol=RTOL)
    glmm.fit(verbose=False)
    assert_allclose(glmm.lml(), -7.800621320491801, atol=ATOL, rtol=RTOL)
예제 #8
0
def test_glmmexpfam_bernoulli_probit_assure_delta_fixed():
    random = RandomState(1)
    N = 10
    G = random.randn(N, N + 50)
    y = bernoulli_sample(0.0, G, random_state=random)

    G = ascontiguousarray(G, dtype=float)
    _stdnorm(G, 0, out=G)
    G /= sqrt(G.shape[1])

    QS = economic_qs_linear(G)
    S0 = QS[1]
    S0 /= S0.mean()

    X = ones((len(y), 1))
    model = GLMMExpFam(y, "probit", X, QS=(QS[0], QS[1]))
    model.fit(verbose=False)

    assert_allclose(model.lml(), -6.108751595773174, rtol=RTOL)
    assert_allclose(model.delta, 1.4901161193847673e-08, atol=1e-5)
    assert_(model._isfixed("logitdelta"))
예제 #9
0
파일: _st_scan.py 프로젝트: zhzheng92/limix
def _perform_glmm(y, lik, M, K, QS, G, verbose):
    from glimix_core.glmm import GLMMExpFam, GLMMNormal
    from pandas import Series
    from xarray import DataArray

    glmm = GLMMExpFam(y.ravel(), lik, M.values, QS)
    glmm.fit(verbose=verbose)
    sys.stdout.flush()

    eta = glmm.site.eta
    tau = glmm.site.tau

    gnormal = GLMMNormal(eta, tau, M.values, QS)
    gnormal.fit(verbose=verbose)

    beta = gnormal.beta

    covariates = list(M.coords["covariate"].values)
    ncov_effsizes = Series(beta, covariates)

    flmm = gnormal.get_fast_scanner()
    flmm.set_scale(1.0)
    null_lml = flmm.null_lml()

    if hasattr(G, "data"):
        values = G.data
    else:
        values = G.values
    alt_lmls, effsizes = flmm.fast_scan(values, verbose=verbose)

    coords = {
        k: ("candidate", G.coords[k].values)
        for k in G.coords.keys()
        if G.coords[k].dims[0] == "candidate"
    }

    alt_lmls = DataArray(alt_lmls, dims=["candidate"], coords=coords)
    effsizes = DataArray(effsizes, dims=["candidate"], coords=coords)

    return QTLModel(null_lml, alt_lmls, effsizes, ncov_effsizes)
예제 #10
0
def test_glmmexpfam_bernoulli_probit_problematic():
    random = RandomState(1)
    N = 30
    G = random.randn(N, N + 50)
    y = bernoulli_sample(0.0, G, random_state=random)

    G = ascontiguousarray(G, dtype=float)
    _stdnorm(G, 0, out=G)
    G /= sqrt(G.shape[1])

    QS = economic_qs_linear(G)
    S0 = QS[1]
    S0 /= S0.mean()

    X = ones((len(y), 1))
    model = GLMMExpFam(y, "probit", X, QS=(QS[0], QS[1]))
    model.delta = 0
    model.fix("delta")
    model.fit(verbose=False)
    assert_allclose(model.lml(), -20.725623168378615, atol=ATOL, rtol=RTOL)
    assert_allclose(model.delta, 0.0001220703125, atol=1e-3)
    assert_allclose(model.scale, 0.33022865011938707, atol=ATOL, rtol=RTOL)
    assert_allclose(model.beta, [-0.002617161564786044], atol=ATOL, rtol=RTOL)

    h20 = model.scale * (1 - model.delta) / (model.scale + 1)

    model.unfix("delta")
    model.delta = 0.5
    model.scale = 1.0
    model.fit(verbose=False)

    assert_allclose(model.lml(), -20.725623168378522, atol=ATOL, rtol=RTOL)
    assert_allclose(model.delta, 0.5017852859580029, atol=1e-3)
    assert_allclose(model.scale, 0.9928931515372, atol=ATOL, rtol=RTOL)
    assert_allclose(model.beta, [-0.003203427206253548], atol=ATOL, rtol=RTOL)

    h21 = model.scale * (1 - model.delta) / (model.scale + 1)

    assert_allclose(h20, h21, atol=ATOL, rtol=RTOL)
예제 #11
0
def test_glmmexpfam_poisson():
    random = RandomState(1)

    # sample size
    n = 30

    # covariates
    offset = ones(n) * random.randn()
    age = random.randint(16, 75, n)
    M = stack((offset, age), axis=1)

    # genetic variants
    G = random.randn(n, 4)

    # sampling the phenotype
    alpha = random.randn(2)
    beta = random.randn(4)
    eps = random.randn(n)
    y = M @ alpha + G @ beta + eps

    # Whole genotype of each sample.
    X = random.randn(n, 50)
    # Estimate a kinship relationship between samples.
    X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1])
    K = X_ @ X_.T + eye(n) * 0.1
    # Update the phenotype
    y += random.multivariate_normal(zeros(n), K)
    y = (y - y.mean()) / y.std()

    z = y.copy()
    y = random.poisson(exp(z))

    M = M - M.mean(0)
    QS = economic_qs(K)
    glmm = GLMMExpFam(y, "poisson", M, QS)
    assert_allclose(glmm.lml(), -52.479557279193585)
    glmm.fit(verbose=False)
    assert_allclose(glmm.lml(), -34.09720756737648)
예제 #12
0
파일: _scan.py 프로젝트: Joyvalley/limix
def _st_glmm(y, lik, M, QS, verbose):
    from numpy import nan
    from glimix_core.glmm import GLMMExpFam, GLMMNormal

    glmm = GLMMExpFam(y, lik, M, QS)

    glmm.fit(verbose=verbose)

    if QS is None:
        v0 = nan
    else:
        v0 = glmm.v0

    v1 = glmm.v1
    sys.stdout.flush()

    eta = glmm.site.eta
    tau = glmm.site.tau

    gnormal = GLMMNormal(eta, tau, M, QS)
    gnormal.fit(verbose=verbose)

    return gnormal.get_fast_scanner(), v0, v1
예제 #13
0
def test_glmmexpfam_bernoulli_problematic():
    random = RandomState(1)
    N = 30
    G = random.randn(N, N + 50)
    y = bernoulli_sample(0.0, G, random_state=random)

    G = ascontiguousarray(G, dtype=float)
    _stdnorm(G, 0, out=G)
    G /= sqrt(G.shape[1])

    QS = economic_qs_linear(G)
    S0 = QS[1]
    S0 /= S0.mean()

    X = ones((len(y), 1))
    model = GLMMExpFam(y, "bernoulli", X, QS=(QS[0], QS[1]))
    model.delta = 0
    model.fix("delta")
    model.fit(verbose=False)
    assert_allclose(model.lml(), -20.727007958026853, atol=ATOL, rtol=RTOL)
    assert_allclose(model.delta, 0, atol=1e-3)
    assert_allclose(model.scale, 0.879915823030081, atol=ATOL, rtol=RTOL)
    assert_allclose(model.beta, [-0.00247856564728], atol=ATOL, rtol=RTOL)
예제 #14
0
def test_glmmexpfam_copy():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    z = random.multivariate_normal(0.2 * ones(nsamples), K)
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm0 = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)

    assert_allclose(glmm0.lml(), -29.10216812909928, atol=ATOL, rtol=RTOL)
    glmm0.fit(verbose=False)

    v = -19.575736562427252
    assert_allclose(glmm0.lml(), v)

    glmm1 = glmm0.copy()
    assert_allclose(glmm1.lml(), v)

    glmm1.scale = 0.92
    assert_allclose(glmm0.lml(), v, atol=ATOL, rtol=RTOL)
    assert_allclose(glmm1.lml(), -30.832831740038056, atol=ATOL, rtol=RTOL)

    glmm0.fit(verbose=False)
    glmm1.fit(verbose=False)

    v = -19.575736562378573
    assert_allclose(glmm0.lml(), v)
    assert_allclose(glmm1.lml(), v)
예제 #15
0
import numpy as np
import numpy_sugar as ns
from glimix_core.glmm import GLMMExpFam
from time import time

G = np.load('null_G.npy')
ntri = np.load('null_ntri.npy')
nsuc = np.load('null_nsuc.npy')
N, P = G.shape

QS = ns.linalg.economic_qs(G.dot(G.T))
X = np.ones((N, 1))

ntri = np.asarray(ntri, float)
nsuc = np.asarray(nsuc, float)

start = time()
glmm = GLMMExpFam((nsuc, ntri), "binomial", X, QS)
glmm.fit(verbose=True)
stop = time()
elapsed = stop - start
print("Elapsed: {}".format(elapsed))
np.save("out/fastglmm_N{}".format(N), elapsed)