def estimate(y_phe, lik, kin, marker_mat=None, verbose=True): ''' estimate variance components ''' lik = normalize_likelihood(lik) lik_name = lik[0] with session_block("Heritability analysis", disable=not verbose): with session_line("Normalising input...", disable=not verbose): data = conform_dataset(y_phe, M=marker_mat, K=kin) y_phe = data["y"] marker_mat = data["M"] kin = data["K"] assert_finite(y_phe, marker_mat, kin) if kin is not None: # K = K / diag(K).mean() q_s = economic_qs(kin) else: q_s = None if lik_name == "normal": method = LMM(y_phe.values, marker_mat.values, q_s, restricted=True) method.fit(verbose=verbose) else: method = GLMMExpFam(y_phe, lik, marker_mat.values, q_s, n_int=500) method.fit(verbose=verbose, factr=1e6, pgtol=1e-3) v_g = method.scale * (1 - method.delta) v_e = method.scale * method.delta if lik_name == "bernoulli": v_e += pi * pi / 3 v_v = var(method.mean()) return v_g, v_v, v_e
def estimate(y, lik, K, M=None, verbose=True): from numpy_sugar.linalg import economic_qs from numpy import pi, var, diag from glimix_core.glmm import GLMMExpFam from glimix_core.lmm import LMM from limix._data._assert import assert_likelihood from limix._data import normalize_likelihood, conform_dataset from limix.qtl._assert import assert_finite from limix._display import session_block, session_line lik = normalize_likelihood(lik) lik_name = lik[0] with session_block("Heritability analysis", disable=not verbose): with session_line("Normalising input...", disable=not verbose): data = conform_dataset(y, M=M, K=K) y = data["y"] M = data["M"] K = data["K"] assert_finite(y, M, K) if K is not None: # K = K / diag(K).mean() QS = economic_qs(K) else: QS = None if lik_name == "normal": method = LMM(y.values, M.values, QS, restricted=True) method.fit(verbose=verbose) else: method = GLMMExpFam(y, lik, M.values, QS, n_int=500) method.fit(verbose=verbose, factr=1e6, pgtol=1e-3) g = method.scale * (1 - method.delta) e = method.scale * method.delta if lik_name == "bernoulli": e += pi * pi / 3 v = var(method.mean()) return g, v, e
def test_lmm_interface(): random = RandomState(1) n = 3 G = random.randn(n, n + 1) X = random.randn(n, 2) y = X @ random.randn(2) + G @ random.randn(G.shape[1]) + random.randn(n) y -= y.mean(0) y /= y.std(0) QS = economic_qs_linear(G) lmm = LMM(y, X, QS, restricted=False) lmm.name = "lmm" lmm.fit(verbose=False) assert_allclose( lmm.covariance(), [ [0.436311031439718, 2.6243891396439837e-16, 2.0432156171727483e-16], [2.6243891396439837e-16, 0.4363110314397185, 4.814313140426306e-16], [2.0432156171727483e-16, 4.814313140426305e-16, 0.43631103143971817], ], atol=1e-7, ) assert_allclose( lmm.mean(), [0.6398184791042468, -0.8738254794097052, 0.7198112606871158], atol=1e-7, ) assert_allclose(lmm.lml(), -3.012715726960625, atol=1e-7) assert_allclose(lmm.value(), lmm.lml(), atol=1e-7) assert_allclose(lmm.lml(), -3.012715726960625, atol=1e-7) assert_allclose( lmm.X, [ [-0.3224172040135075, -0.38405435466841564], [1.1337694423354374, -1.0998912673140309], [-0.17242820755043575, -0.8778584179213718], ], atol=1e-7, ) assert_allclose(lmm.beta, [-1.3155159120000266, -0.5615702941530938], atol=1e-7) assert_allclose( lmm.beta_covariance, [ [0.44737305797088345, 0.20431961864892412], [0.20431961864892412, 0.29835835133251526], ], atol=1e-7, ) assert_allclose(lmm.delta, 0.9999999999999998, atol=1e-7) assert_equal(lmm.ncovariates, 2) assert_equal(lmm.nsamples, 3) assert_allclose(lmm.scale, 0.43631103143971767, atol=1e-7) assert_allclose(lmm.v0, 9.688051060046502e-17, atol=1e-7) assert_allclose(lmm.v1, 0.43631103143971756, atol=1e-7) assert_equal(lmm.name, "lmm") with pytest.raises(NotImplementedError): lmm.gradient()
def test_lmm_predict(): random = RandomState(9458) n = 30 X = random.randn(n, n + 1) X -= X.mean(0) X /= X.std(0) X /= sqrt(X.shape[1]) offset = 1.0 mean = OffsetMean(n) mean.offset = offset cov_left = LinearCov(X) cov_left.scale = 1.5 cov_right = EyeCov(n) cov_right.scale = 1.5 cov = SumCov([cov_left, cov_right]) lik = DeltaProdLik() y = GGPSampler(lik, mean, cov).sample(random) QS = economic_qs_linear(X) lmm = LMM(y, ones((n, 1)), QS) lmm.fit(verbose=False) plmm = LMMPredict(y, lmm.beta, lmm.v0, lmm.v1, lmm.mean(), lmm.covariance()) K = dot(X, X.T) pm = plmm.predictive_mean(ones((n, 1)), K, K.diagonal()) assert_allclose(corrcoef(y, pm)[0, 1], 0.8358820971891354)
def estimate(y, lik, K, M=None, verbose=True): r"""Estimate the so-called narrow-sense heritability. It supports Normal, Bernoulli, Probit, Binomial, and Poisson phenotypes. Let :math:`N` be the sample size and :math:`S` the number of covariates. Parameters ---------- y : array_like Either a tuple of two arrays of `N` individuals each (Binomial phenotypes) or an array of `N` individuals (Normal, Poisson, or Bernoulli phenotypes). If a continuous phenotype is provided (i.e., a Normal one), make sure they have been normalised in such a way that its values are not extremely large; it might cause numerical errors otherwise. For example, by using :func:`limix.qc.mean_standardize` or :func:`limix.qc.quantile_gaussianize`. lik : "normal", "bernoulli", "probit", binomial", "poisson" Sample likelihood describing the residual distribution. K : array_like :math:`N`-by-:math:`N` covariance matrix. It might be, for example, the estimated kinship relationship between the individuals. The provided matrix will be normalised via the function :func:`limix.qc.normalise_covariance`. M : array_like, optional :math:`N` individuals by :math:`S` covariates. It will create a :math:`N`-by-:math:`1` matrix ``M`` of ones representing the offset covariate if ``None`` is passed. If an array is passed, it will used as is. Defaults to ``None``. verbose : bool, optional ``True`` to display progress and summary; ``False`` otherwise. Returns ------- float Estimated heritability. Examples -------- .. doctest:: >>> from numpy import dot, exp, sqrt >>> from numpy.random import RandomState >>> from limix.her import estimate >>> >>> random = RandomState(0) >>> >>> G = random.randn(150, 200) / sqrt(200) >>> K = dot(G, G.T) >>> z = dot(G, random.randn(200)) + random.randn(150) >>> y = random.poisson(exp(z)) >>> >>> print('%.3f' % estimate(y, 'poisson', K, verbose=False)) # doctest: +FLOAT_CMP 0.183 Notes ----- It will raise a ``ValueError`` exception if non-finite values are passed. Please, refer to the :func:`limix.qc.mean_impute` function for missing value imputation. """ from numpy_sugar import is_all_finite from numpy_sugar.linalg import economic_qs from numpy import ones, pi, var from glimix_core.glmm import GLMMExpFam from glimix_core.lmm import LMM if not isinstance(lik, (tuple, list)): lik = (lik,) lik_name = lik[0].lower() check_likelihood_name(lik_name) with session_block("heritability analysis", disable=not verbose): if M is None: M = ones((len(y), 1)) with session_line("Normalising input...", disable=not verbose): data = conform_dataset(y, M=M, K=K) y = data["y"] M = data["M"] K = data["K"] if not is_all_finite(y): raise ValueError("Outcome must have finite values only.") if not is_all_finite(M): raise ValueError("Covariates must have finite values only.") if K is not None: if not is_all_finite(K): raise ValueError("Covariate matrix must have finite values only.") K = normalise_covariance(K) y = normalise_extreme_values(y, lik) if K is not None: QS = economic_qs(K) else: QS = None if lik_name == "normal": method = LMM(y.values, M.values, QS) method.fit(verbose=verbose) else: method = GLMMExpFam(y, lik, M.values, QS, n_int=500) method.fit(verbose=verbose, factr=1e6, pgtol=1e-3) g = method.scale * (1 - method.delta) e = method.scale * method.delta if lik_name == "bernoulli": e += pi * pi / 3 if lik_name == "normal": v = method.fixed_effects_variance else: v = var(method.mean()) return g / (v + g + e)