예제 #1
0
파일: estimate.py 프로젝트: noahpieta/limix
def estimate(pheno, lik, K, covs=None, verbose=True):
    r"""Estimate the so-called narrow-sense heritability.

    It supports Normal, Bernoulli, Binomial, and Poisson phenotypes.
    Let :math:`N` be the sample size and :math:`S` the number of covariates.

    Parameters
    ----------
    pheno : tuple, array_like
        Phenotype. Dimensions :math:`N\\times 0`.
    lik : {'normal', 'bernoulli', 'binomial', 'poisson'}
        Likelihood name.
    K : array_like
        Kinship matrix. Dimensions :math:`N\\times N`.
    covs : array_like
        Covariates. Default is an offset. Dimensions :math:`N\\times S`.

    Returns
    -------
    float
        Estimated heritability.

    Examples
    --------
    .. doctest::

        >>> from numpy import dot, exp, sqrt
        >>> from numpy.random import RandomState
        >>> from limix.heritability import estimate
        >>>
        >>> random = RandomState(0)
        >>>
        >>> G = random.randn(50, 100)
        >>> K = dot(G, G.T)
        >>> z = dot(G, random.randn(100)) / sqrt(100)
        >>> y = random.poisson(exp(z))
        >>>
        >>> print('%.2f' % estimate(y, 'poisson', K, verbose=False))
        0.70
    """

    K = _background_standardize(K)
    QS = economic_qs(K)

    lik = lik.lower()

    if lik == "binomial":
        p = len(pheno[0])
    else:
        p = len(pheno)

    if covs is None:
        covs = ones((p, 1))

    glmm = GLMMExpFam(pheno, lik, covs, QS)
    glmm.feed().maximize(verbose=verbose)

    g = glmm.scale * (1 - glmm.delta)
    e = glmm.scale * glmm.delta
    h2 = g / (var(glmm.mean()) + g + e)

    return h2
예제 #2
0
def qtl_test_glmm(
    snps,
    pheno,
    lik,
    K,
    covs=None,
    test="lrt",
    NumIntervalsDeltaAlt=100,
    searchDelta=False,
    verbose=True,
):
    """
    Wrapper function for univariate single-variant association testing
    using a generalised linear mixed model.

    Args:
        snps (array_like):
            `N` individuals by `S` SNPs.
        pheno (tuple, array_like):
            Either a tuple of two arrays of `N` individuals each (Binomial
            phenotypes) or an array of `N` individuals (Poisson or Bernoulli
            phenotypes). It does not support missing values yet.
        lik ({'bernoulli', 'binomial', 'poisson'}):
            Sample likelihood describing the residual distribution.
        K (array_like):
            `N` by `N` covariance matrix (e.g., kinship coefficients).
        covs (array_like, optional):
            `N` individuals by `D` covariates.
            By default, ``covs`` is a (`N`, `1`) array of ones.
        test ({'lrt'}, optional):
            Likelihood ratio test (default).
        NumIntervalsDeltaAlt (int, optional):
            number of steps for delta optimization on the alternative model.
            Requires ``searchDelta=True`` to have an effect.
        searchDelta (bool, optional):
            if ``True``, delta optimization on the alternative model is
            carried out. By default ``searchDelta`` is ``False``.
        verbose (bool, optional):
            if ``True``, details such as runtime are displayed.

    Returns:
        :class:`limix.qtl.LMM`: LIMIX LMM object

    Examples
    --------
    .. doctest::

        >>> from numpy import dot, exp, sqrt
        >>> from numpy.random import RandomState
        >>> from limix.qtl import qtl_test_glmm
        >>>
        >>> random = RandomState(0)
        >>>
        >>> G = random.randn(250, 500) / sqrt(500)
        >>> beta = 0.01 * random.randn(500)
        >>>
        >>> z = dot(G, beta) + 0.1 * random.randn(250)
        >>> z += dot(G[:, 0], 1) # causal SNP
        >>>
        >>> y = random.poisson(exp(z))
        >>>
        >>> candidates = G[:, :5]
        >>> K = dot(G[:, 5:], G[:, 5:].T)
        >>> lm = qtl_test_glmm(candidates, y, 'poisson', K, verbose=False)
        >>>
        >>> print(lm.getPv())
        [[0.0694 0.3336 0.5899 0.7388 0.7796]]
    """

    snps = _asarray(snps)

    if covs is None:
        covs = ones((snps.shape[0], 1))
    else:
        covs = _asarray(covs)

    K = _asarray(K)

    if isinstance(pheno, (tuple, list)):
        y = tuple([asarray(p, float) for p in pheno])
    else:
        y = asarray(pheno, float)

    start = time()
    QS = economic_qs(K)
    glmm = GLMMExpFam(y, lik, covs, QS)
    glmm.feed().maximize(verbose=verbose)

    # extract stuff from glmm
    eta = glmm.site.eta
    tau = glmm.site.tau
    scale = float(glmm.scale)
    delta = float(glmm.delta)

    # define useful quantities
    mu = eta / tau
    var = 1. / tau
    s2_g = scale * (1 - delta)
    tR = s2_g * K + diag(var - var.min() + 1e-4)

    start = time()
    lmm = LMM(snps=snps, pheno=mu, K=tR, covs=covs, verbose=verbose)
    # if verbose:
    #     print("Elapsed time for LMM part: %.3f" % (time() - start))

    return lmm