def test_glmmexpfam_layout(): y = asarray([1.0, 0.5]) X = asarray([[0.5, 1.0]]) K = asarray([[1.0, 0.0], [0.0, 1.0]]) QS = economic_qs(K) with pytest.raises(ValueError): GLMMExpFam(y, "poisson", X, QS=QS) y = asarray([1.0]) with pytest.raises(ValueError): GLMMExpFam(y, "poisson", X, QS=QS)
def test_glmmexpfam_qs_none(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() z = random.multivariate_normal(0.2 * ones(nsamples), K) ntri = random.randint(1, 30, nsamples) nsuc = zeros(nsamples, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, None) assert_allclose(glmm.lml(), -38.30173374439622, atol=ATOL, rtol=RTOL) glmm.fix("beta") glmm.fix("scale") glmm.fit(verbose=False) assert_allclose(glmm.lml(), -32.03927471370041, atol=ATOL, rtol=RTOL) glmm.unfix("beta") glmm.unfix("scale") glmm.fit(verbose=False) assert_allclose(glmm.lml(), -19.575736561760586, atol=ATOL, rtol=RTOL)
def test_glmmexpfam_precise(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = [random.randint(0, i) for i in ntri] glmm = GLMMExpFam(nsuc, ["binomial", ntri], X, QS) glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4]) glmm.scale = 1.0 assert_allclose(glmm.lml(), -44.74191041468836, atol=ATOL, rtol=RTOL) glmm.scale = 2.0 assert_allclose(glmm.lml(), -36.19907331929086, atol=ATOL, rtol=RTOL) glmm.scale = 3.0 assert_allclose(glmm.lml(), -33.02139830387104, atol=ATOL, rtol=RTOL) glmm.scale = 4.0 assert_allclose(glmm.lml(), -31.42553401678996, atol=ATOL, rtol=RTOL) glmm.scale = 5.0 assert_allclose(glmm.lml(), -30.507029479473243, atol=ATOL, rtol=RTOL) glmm.scale = 6.0 assert_allclose(glmm.lml(), -29.937569702301232, atol=ATOL, rtol=RTOL) glmm.delta = 0.1 assert_allclose(glmm.lml(), -30.09977907145003, atol=ATOL, rtol=RTOL) assert_allclose(glmm._check_grad(), 0, atol=1e-3, rtol=RTOL)
def estimate(y, lik, K, M=None, verbose=True): from numpy_sugar.linalg import economic_qs from numpy import pi, var, diag from glimix_core.glmm import GLMMExpFam from glimix_core.lmm import LMM from limix._data._assert import assert_likelihood from limix._data import normalize_likelihood, conform_dataset from limix.qtl._assert import assert_finite from limix._display import session_block, session_line lik = normalize_likelihood(lik) lik_name = lik[0] with session_block("Heritability analysis", disable=not verbose): with session_line("Normalising input...", disable=not verbose): data = conform_dataset(y, M=M, K=K) y = data["y"] M = data["M"] K = data["K"] assert_finite(y, M, K) if K is not None: # K = K / diag(K).mean() QS = economic_qs(K) else: QS = None if lik_name == "normal": method = LMM(y.values, M.values, QS, restricted=True) method.fit(verbose=verbose) else: method = GLMMExpFam(y, lik, M.values, QS, n_int=500) method.fit(verbose=verbose, factr=1e6, pgtol=1e-3) g = method.scale * (1 - method.delta) e = method.scale * method.delta if lik_name == "bernoulli": e += pi * pi / 3 v = var(method.mean()) return g, v, e
def test_glmmexpfam_optimize(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() z = random.multivariate_normal(0.2 * ones(nsamples), K) QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = zeros(nsamples, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) assert_allclose(glmm.lml(), -29.102168129099287, atol=ATOL, rtol=RTOL) glmm.fix("beta") glmm.fix("scale") glmm.fit(verbose=False) assert_allclose(glmm.lml(), -27.635788105778012, atol=ATOL, rtol=RTOL) glmm.unfix("beta") glmm.unfix("scale") glmm.fit(verbose=False) assert_allclose(glmm.lml(), -19.68486269551159, atol=ATOL, rtol=RTOL)
def estimate(y_phe, lik, kin, marker_mat=None, verbose=True): ''' estimate variance components ''' lik = normalize_likelihood(lik) lik_name = lik[0] with session_block("Heritability analysis", disable=not verbose): with session_line("Normalising input...", disable=not verbose): data = conform_dataset(y_phe, M=marker_mat, K=kin) y_phe = data["y"] marker_mat = data["M"] kin = data["K"] assert_finite(y_phe, marker_mat, kin) if kin is not None: # K = K / diag(K).mean() q_s = economic_qs(kin) else: q_s = None if lik_name == "normal": method = LMM(y_phe.values, marker_mat.values, q_s, restricted=True) method.fit(verbose=verbose) else: method = GLMMExpFam(y_phe, lik, marker_mat.values, q_s, n_int=500) method.fit(verbose=verbose, factr=1e6, pgtol=1e-3) v_g = method.scale * (1 - method.delta) v_e = method.scale * method.delta if lik_name == "bernoulli": v_e += pi * pi / 3 v_v = var(method.mean()) return v_g, v_v, v_e
def test_glmmexpfam_delta_one_zero(): random = RandomState(1) n = 30 X = random.randn(n, 6) K = dot(X, X.T) K /= K.diagonal().mean() QS = economic_qs(K) ntri = random.randint(1, 30, n) nsuc = [random.randint(0, i) for i in ntri] glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4, -0.2]) glmm.delta = 0 assert_allclose(glmm.lml(), -113.24570457063275) assert_allclose(glmm._check_grad(step=1e-4), 0, atol=1e-2) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -98.21144899310399, atol=ATOL, rtol=RTOL) assert_allclose(glmm.delta, 0, atol=ATOL, rtol=RTOL) glmm.delta = 1 assert_allclose(glmm.lml(), -98.00058169240869, atol=ATOL, rtol=RTOL) assert_allclose(glmm._check_grad(step=1e-4), 0, atol=1e-1) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -72.82680948264196, atol=ATOL, rtol=RTOL) assert_allclose(glmm.delta, 0.9999999850988439, atol=ATOL, rtol=RTOL)
def test_glmmexpfam_wrong_qs(): random = RandomState(0) X = random.randn(10, 15) linear_eye_cov().value() QS = [0, 1] ntri = random.randint(1, 30, 10) nsuc = [random.randint(0, i) for i in ntri] with pytest.raises(ValueError): GLMMExpFam((nsuc, ntri), "binomial", X, QS)
def test_glmmexpfam_predict(): random = RandomState(4) n = 100 p = n + 1 X = ones((n, 2)) X[:, 1] = random.randn(n) G = random.randn(n, p) G /= G.std(0) G -= G.mean(0) G /= sqrt(p) K = dot(G, G.T) i = asarray(arange(0, n), int) si = random.choice(i, n, replace=False) ntest = int(n // 5) itrain = si[:-ntest] itest = si[-ntest:] Xtrain = X[itrain, :] Ktrain = K[itrain, :][:, itrain] Xtest = X[itest, :] beta = random.randn(2) z = random.multivariate_normal(dot(X, beta), 0.9 * K + 0.1 * eye(n)) ntri = random.randint(1, 100, n) nsuc = zeros(n, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) QStrain = economic_qs(Ktrain) nsuc_train = ascontiguousarray(nsuc[itrain]) ntri_train = ascontiguousarray(ntri[itrain]) nsuc_test = ascontiguousarray(nsuc[itest]) ntri_test = ascontiguousarray(ntri[itest]) glmm = GLMMExpFam(nsuc_train, ("binomial", ntri_train), Xtrain, QStrain) glmm.fit(verbose=False) ks = K[itest, :][:, itrain] kss = asarray([K[i, i] for i in itest]) pm = glmm.predictive_mean(Xtest, ks, kss) pk = glmm.predictive_covariance(Xtest, ks, kss) r = nsuc_test / ntri_test assert_(corrcoef([pm, r])[0, 1] > 0.8) assert_allclose(pk[0], 54.263705682514846)
def _fit_glmm_simple_model(self, verbose): from numpy_sugar.linalg import economic_qs from glimix_core.glmm import GLMMExpFam from numpy import asarray K = self._get_matrix_simple_model() y = asarray(self._y, float).ravel() QS = None if K is not None: QS = economic_qs(K) glmm = GLMMExpFam(y, self._lik, self._M, QS) glmm.fit(verbose=verbose) self._set_simple_model_variances(glmm.v0, glmm.v1) self._glmm = glmm
def test_glmmexpfam_poisson(): from numpy import ones, stack, exp, zeros from numpy.random import RandomState from numpy_sugar.linalg import economic_qs from pandas import DataFrame random = RandomState(1) # sample size n = 30 # covariates offset = ones(n) * random.randn() age = random.randint(16, 75, n) M = stack((offset, age), axis=1) M = DataFrame(stack([offset, age], axis=1), columns=["offset", "age"]) M["sample"] = [f"sample{i}" for i in range(n)] M = M.set_index("sample") # genetic variants G = random.randn(n, 4) # sampling the phenotype alpha = random.randn(2) beta = random.randn(4) eps = random.randn(n) y = M @ alpha + G @ beta + eps # Whole genotype of each sample. X = random.randn(n, 50) # Estimate a kinship relationship between samples. X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1]) K = X_ @ X_.T + eye(n) * 0.1 # Update the phenotype y += random.multivariate_normal(zeros(n), K) y = (y - y.mean()) / y.std() z = y.copy() y = random.poisson(exp(z)) M = M - M.mean(0) QS = economic_qs(K) glmm = GLMMExpFam(y, "poisson", M, QS) assert_allclose(glmm.lml(), -52.479557279193585) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -34.09720756737648)
def test_glmmexpfam_scale_very_high(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = [random.randint(0, i) for i in ntri] glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4]) glmm.scale = 30.0 assert_allclose(glmm.lml(), -29.632791380478736, atol=ATOL, rtol=RTOL) assert_allclose(glmm._check_grad(), 0, atol=1e-3)
def test_glmmexpfam_delta1(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = [random.randint(0, i) for i in ntri] glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4]) glmm.delta = 1 assert_allclose(glmm.lml(), -47.09677870648636, atol=ATOL, rtol=RTOL) assert_allclose(glmm._check_grad(), 0, atol=1e-4)
def _glmm(y, lik, M, QS, verbose): from glimix_core.glmm import GLMMExpFam, GLMMNormal glmm = GLMMExpFam(y.ravel(), lik, M, QS) glmm.fit(verbose=verbose) v0 = glmm.v0 v1 = glmm.v1 sys.stdout.flush() eta = glmm.site.eta tau = glmm.site.tau gnormal = GLMMNormal(eta, tau, M, QS) gnormal.fit(verbose=verbose) scanner = ScannerWrapper(gnormal.get_fast_scanner()) return scanner, v0, v1
def test_glmmexpfam_optimize_low_rank(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = dot(X, X.T) z = dot(X, 0.2 * random.randn(5)) QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = zeros(nsamples, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) assert_allclose(glmm.lml(), -18.60476792256323, atol=ATOL, rtol=RTOL) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -7.800621320491801, atol=ATOL, rtol=RTOL)
def test_glmmexpfam_binomial_large_ntrials(): random = RandomState(0) n = 10 X = random.randn(n, 2) G = random.randn(n, 100) K = dot(G, G.T) ntrials = random.randint(1, 100000, n) z = dot(G, random.randn(100)) / sqrt(100) successes = zeros(len(ntrials), int) for i in range(len(ntrials)): for _ in range(ntrials[i]): successes[i] += int(z[i] + 0.1 * random.randn() > 0) QS = economic_qs(K) glmm = GLMMExpFam(successes, ("binomial", ntrials), X, QS) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -43.067433588125446)
def test_glmmexpfam_bernoulli_probit_assure_delta_fixed(): random = RandomState(1) N = 10 G = random.randn(N, N + 50) y = bernoulli_sample(0.0, G, random_state=random) G = ascontiguousarray(G, dtype=float) _stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) QS = economic_qs_linear(G) S0 = QS[1] S0 /= S0.mean() X = ones((len(y), 1)) model = GLMMExpFam(y, "probit", X, QS=(QS[0], QS[1])) model.fit(verbose=False) assert_allclose(model.lml(), -6.108751595773174, rtol=RTOL) assert_allclose(model.delta, 1.4901161193847673e-08, atol=1e-5) assert_(model._isfixed("logitdelta"))
def _perform_glmm(y, lik, M, K, QS, G, verbose): from glimix_core.glmm import GLMMExpFam, GLMMNormal from pandas import Series from xarray import DataArray glmm = GLMMExpFam(y.ravel(), lik, M.values, QS) glmm.fit(verbose=verbose) sys.stdout.flush() eta = glmm.site.eta tau = glmm.site.tau gnormal = GLMMNormal(eta, tau, M.values, QS) gnormal.fit(verbose=verbose) beta = gnormal.beta covariates = list(M.coords["covariate"].values) ncov_effsizes = Series(beta, covariates) flmm = gnormal.get_fast_scanner() flmm.set_scale(1.0) null_lml = flmm.null_lml() if hasattr(G, "data"): values = G.data else: values = G.values alt_lmls, effsizes = flmm.fast_scan(values, verbose=verbose) coords = { k: ("candidate", G.coords[k].values) for k in G.coords.keys() if G.coords[k].dims[0] == "candidate" } alt_lmls = DataArray(alt_lmls, dims=["candidate"], coords=coords) effsizes = DataArray(effsizes, dims=["candidate"], coords=coords) return QTLModel(null_lml, alt_lmls, effsizes, ncov_effsizes)
def test_glmmexpfam_bernoulli_probit_problematic(): random = RandomState(1) N = 30 G = random.randn(N, N + 50) y = bernoulli_sample(0.0, G, random_state=random) G = ascontiguousarray(G, dtype=float) _stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) QS = economic_qs_linear(G) S0 = QS[1] S0 /= S0.mean() X = ones((len(y), 1)) model = GLMMExpFam(y, "probit", X, QS=(QS[0], QS[1])) model.delta = 0 model.fix("delta") model.fit(verbose=False) assert_allclose(model.lml(), -20.725623168378615, atol=ATOL, rtol=RTOL) assert_allclose(model.delta, 0.0001220703125, atol=1e-3) assert_allclose(model.scale, 0.33022865011938707, atol=ATOL, rtol=RTOL) assert_allclose(model.beta, [-0.002617161564786044], atol=ATOL, rtol=RTOL) h20 = model.scale * (1 - model.delta) / (model.scale + 1) model.unfix("delta") model.delta = 0.5 model.scale = 1.0 model.fit(verbose=False) assert_allclose(model.lml(), -20.725623168378522, atol=ATOL, rtol=RTOL) assert_allclose(model.delta, 0.5017852859580029, atol=1e-3) assert_allclose(model.scale, 0.9928931515372, atol=ATOL, rtol=RTOL) assert_allclose(model.beta, [-0.003203427206253548], atol=ATOL, rtol=RTOL) h21 = model.scale * (1 - model.delta) / (model.scale + 1) assert_allclose(h20, h21, atol=ATOL, rtol=RTOL)
def test_glmmexpfam_poisson(): random = RandomState(1) # sample size n = 30 # covariates offset = ones(n) * random.randn() age = random.randint(16, 75, n) M = stack((offset, age), axis=1) # genetic variants G = random.randn(n, 4) # sampling the phenotype alpha = random.randn(2) beta = random.randn(4) eps = random.randn(n) y = M @ alpha + G @ beta + eps # Whole genotype of each sample. X = random.randn(n, 50) # Estimate a kinship relationship between samples. X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1]) K = X_ @ X_.T + eye(n) * 0.1 # Update the phenotype y += random.multivariate_normal(zeros(n), K) y = (y - y.mean()) / y.std() z = y.copy() y = random.poisson(exp(z)) M = M - M.mean(0) QS = economic_qs(K) glmm = GLMMExpFam(y, "poisson", M, QS) assert_allclose(glmm.lml(), -52.479557279193585) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -34.09720756737648)
def _st_glmm(y, lik, M, QS, verbose): from numpy import nan from glimix_core.glmm import GLMMExpFam, GLMMNormal glmm = GLMMExpFam(y, lik, M, QS) glmm.fit(verbose=verbose) if QS is None: v0 = nan else: v0 = glmm.v0 v1 = glmm.v1 sys.stdout.flush() eta = glmm.site.eta tau = glmm.site.tau gnormal = GLMMNormal(eta, tau, M, QS) gnormal.fit(verbose=verbose) return gnormal.get_fast_scanner(), v0, v1
def test_glmmexpfam_bernoulli_problematic(): random = RandomState(1) N = 30 G = random.randn(N, N + 50) y = bernoulli_sample(0.0, G, random_state=random) G = ascontiguousarray(G, dtype=float) _stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) QS = economic_qs_linear(G) S0 = QS[1] S0 /= S0.mean() X = ones((len(y), 1)) model = GLMMExpFam(y, "bernoulli", X, QS=(QS[0], QS[1])) model.delta = 0 model.fix("delta") model.fit(verbose=False) assert_allclose(model.lml(), -20.727007958026853, atol=ATOL, rtol=RTOL) assert_allclose(model.delta, 0, atol=1e-3) assert_allclose(model.scale, 0.879915823030081, atol=ATOL, rtol=RTOL) assert_allclose(model.beta, [-0.00247856564728], atol=ATOL, rtol=RTOL)
def test_glmmexpfam_copy(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() z = random.multivariate_normal(0.2 * ones(nsamples), K) QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = zeros(nsamples, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) glmm0 = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) assert_allclose(glmm0.lml(), -29.10216812909928, atol=ATOL, rtol=RTOL) glmm0.fit(verbose=False) v = -19.575736562427252 assert_allclose(glmm0.lml(), v) glmm1 = glmm0.copy() assert_allclose(glmm1.lml(), v) glmm1.scale = 0.92 assert_allclose(glmm0.lml(), v, atol=ATOL, rtol=RTOL) assert_allclose(glmm1.lml(), -30.832831740038056, atol=ATOL, rtol=RTOL) glmm0.fit(verbose=False) glmm1.fit(verbose=False) v = -19.575736562378573 assert_allclose(glmm0.lml(), v) assert_allclose(glmm1.lml(), v)
def estimate(pheno, lik, K, covs=None, verbose=True): r"""Estimate the so-called narrow-sense heritability. It supports Normal, Bernoulli, Binomial, and Poisson phenotypes. Let :math:`N` be the sample size and :math:`S` the number of covariates. Parameters ---------- pheno : tuple, array_like Phenotype. Dimensions :math:`N\\times 0`. lik : {'normal', 'bernoulli', 'binomial', 'poisson'} Likelihood name. K : array_like Kinship matrix. Dimensions :math:`N\\times N`. covs : array_like Covariates. Default is an offset. Dimensions :math:`N\\times S`. Returns ------- float Estimated heritability. Examples -------- .. doctest:: >>> from numpy import dot, exp, sqrt >>> from numpy.random import RandomState >>> from limix.heritability import estimate >>> >>> random = RandomState(0) >>> >>> G = random.randn(50, 100) >>> K = dot(G, G.T) >>> z = dot(G, random.randn(100)) / sqrt(100) >>> y = random.poisson(exp(z)) >>> >>> print('%.2f' % estimate(y, 'poisson', K, verbose=False)) 0.70 """ K = _background_standardize(K) QS = economic_qs(K) lik = lik.lower() if lik == "binomial": p = len(pheno[0]) else: p = len(pheno) if covs is None: covs = ones((p, 1)) glmm = GLMMExpFam(pheno, lik, covs, QS) glmm.feed().maximize(verbose=verbose) g = glmm.scale * (1 - glmm.delta) e = glmm.scale * glmm.delta h2 = g / (var(glmm.mean()) + g + e) return h2
import numpy as np import numpy_sugar as ns from glimix_core.glmm import GLMMExpFam from time import time G = np.load('null_G.npy') ntri = np.load('null_ntri.npy') nsuc = np.load('null_nsuc.npy') N, P = G.shape QS = ns.linalg.economic_qs(G.dot(G.T)) X = np.ones((N, 1)) ntri = np.asarray(ntri, float) nsuc = np.asarray(nsuc, float) start = time() glmm = GLMMExpFam((nsuc, ntri), "binomial", X, QS) glmm.fit(verbose=True) stop = time() elapsed = stop - start print("Elapsed: {}".format(elapsed)) np.save("out/fastglmm_N{}".format(N), elapsed)
def estimate(y, lik, K, M=None, verbose=True): r"""Estimate the so-called narrow-sense heritability. It supports Normal, Bernoulli, Probit, Binomial, and Poisson phenotypes. Let :math:`N` be the sample size and :math:`S` the number of covariates. Parameters ---------- y : array_like Either a tuple of two arrays of `N` individuals each (Binomial phenotypes) or an array of `N` individuals (Normal, Poisson, or Bernoulli phenotypes). If a continuous phenotype is provided (i.e., a Normal one), make sure they have been normalised in such a way that its values are not extremely large; it might cause numerical errors otherwise. For example, by using :func:`limix.qc.mean_standardize` or :func:`limix.qc.quantile_gaussianize`. lik : "normal", "bernoulli", "probit", binomial", "poisson" Sample likelihood describing the residual distribution. K : array_like :math:`N`-by-:math:`N` covariance matrix. It might be, for example, the estimated kinship relationship between the individuals. The provided matrix will be normalised via the function :func:`limix.qc.normalise_covariance`. M : array_like, optional :math:`N` individuals by :math:`S` covariates. It will create a :math:`N`-by-:math:`1` matrix ``M`` of ones representing the offset covariate if ``None`` is passed. If an array is passed, it will used as is. Defaults to ``None``. verbose : bool, optional ``True`` to display progress and summary; ``False`` otherwise. Returns ------- float Estimated heritability. Examples -------- .. doctest:: >>> from numpy import dot, exp, sqrt >>> from numpy.random import RandomState >>> from limix.her import estimate >>> >>> random = RandomState(0) >>> >>> G = random.randn(150, 200) / sqrt(200) >>> K = dot(G, G.T) >>> z = dot(G, random.randn(200)) + random.randn(150) >>> y = random.poisson(exp(z)) >>> >>> print('%.3f' % estimate(y, 'poisson', K, verbose=False)) # doctest: +FLOAT_CMP 0.183 Notes ----- It will raise a ``ValueError`` exception if non-finite values are passed. Please, refer to the :func:`limix.qc.mean_impute` function for missing value imputation. """ from numpy_sugar import is_all_finite from numpy_sugar.linalg import economic_qs from numpy import ones, pi, var from glimix_core.glmm import GLMMExpFam from glimix_core.lmm import LMM if not isinstance(lik, (tuple, list)): lik = (lik,) lik_name = lik[0].lower() check_likelihood_name(lik_name) with session_block("heritability analysis", disable=not verbose): if M is None: M = ones((len(y), 1)) with session_line("Normalising input...", disable=not verbose): data = conform_dataset(y, M=M, K=K) y = data["y"] M = data["M"] K = data["K"] if not is_all_finite(y): raise ValueError("Outcome must have finite values only.") if not is_all_finite(M): raise ValueError("Covariates must have finite values only.") if K is not None: if not is_all_finite(K): raise ValueError("Covariate matrix must have finite values only.") K = normalise_covariance(K) y = normalise_extreme_values(y, lik) if K is not None: QS = economic_qs(K) else: QS = None if lik_name == "normal": method = LMM(y.values, M.values, QS) method.fit(verbose=verbose) else: method = GLMMExpFam(y, lik, M.values, QS, n_int=500) method.fit(verbose=verbose, factr=1e6, pgtol=1e-3) g = method.scale * (1 - method.delta) e = method.scale * method.delta if lik_name == "bernoulli": e += pi * pi / 3 if lik_name == "normal": v = method.fixed_effects_variance else: v = var(method.mean()) return g / (v + g + e)
def qtl_test_glmm( snps, pheno, lik, K, covs=None, test="lrt", NumIntervalsDeltaAlt=100, searchDelta=False, verbose=True, ): """ Wrapper function for univariate single-variant association testing using a generalised linear mixed model. Args: snps (array_like): `N` individuals by `S` SNPs. pheno (tuple, array_like): Either a tuple of two arrays of `N` individuals each (Binomial phenotypes) or an array of `N` individuals (Poisson or Bernoulli phenotypes). It does not support missing values yet. lik ({'bernoulli', 'binomial', 'poisson'}): Sample likelihood describing the residual distribution. K (array_like): `N` by `N` covariance matrix (e.g., kinship coefficients). covs (array_like, optional): `N` individuals by `D` covariates. By default, ``covs`` is a (`N`, `1`) array of ones. test ({'lrt'}, optional): Likelihood ratio test (default). NumIntervalsDeltaAlt (int, optional): number of steps for delta optimization on the alternative model. Requires ``searchDelta=True`` to have an effect. searchDelta (bool, optional): if ``True``, delta optimization on the alternative model is carried out. By default ``searchDelta`` is ``False``. verbose (bool, optional): if ``True``, details such as runtime are displayed. Returns: :class:`limix.qtl.LMM`: LIMIX LMM object Examples -------- .. doctest:: >>> from numpy import dot, exp, sqrt >>> from numpy.random import RandomState >>> from limix.qtl import qtl_test_glmm >>> >>> random = RandomState(0) >>> >>> G = random.randn(250, 500) / sqrt(500) >>> beta = 0.01 * random.randn(500) >>> >>> z = dot(G, beta) + 0.1 * random.randn(250) >>> z += dot(G[:, 0], 1) # causal SNP >>> >>> y = random.poisson(exp(z)) >>> >>> candidates = G[:, :5] >>> K = dot(G[:, 5:], G[:, 5:].T) >>> lm = qtl_test_glmm(candidates, y, 'poisson', K, verbose=False) >>> >>> print(lm.getPv()) [[0.0694 0.3336 0.5899 0.7388 0.7796]] """ snps = _asarray(snps) if covs is None: covs = ones((snps.shape[0], 1)) else: covs = _asarray(covs) K = _asarray(K) if isinstance(pheno, (tuple, list)): y = tuple([asarray(p, float) for p in pheno]) else: y = asarray(pheno, float) start = time() QS = economic_qs(K) glmm = GLMMExpFam(y, lik, covs, QS) glmm.feed().maximize(verbose=verbose) # extract stuff from glmm eta = glmm.site.eta tau = glmm.site.tau scale = float(glmm.scale) delta = float(glmm.delta) # define useful quantities mu = eta / tau var = 1. / tau s2_g = scale * (1 - delta) tR = s2_g * K + diag(var - var.min() + 1e-4) start = time() lmm = LMM(snps=snps, pheno=mu, K=tR, covs=covs, verbose=verbose) # if verbose: # print("Elapsed time for LMM part: %.3f" % (time() - start)) return lmm