Exemple #1
0
def _hinv(A00, A01, A11):
    from numpy_sugar import is_all_finite

    rcond = 1e-15
    b = atleast_1d(A01)
    d = atleast_1d(A11)
    a = full_like(d, A00)
    m = maximum(maximum(npy_abs(b), npy_abs(d)), abs(a))

    a /= m
    b = b / m
    c = b
    d = d / m

    bc = b * c
    ad = a * d
    with errstate(invalid="ignore", divide="ignore"):
        ai = a / (a * a - nan_to_num((bc * a) / d))
        bi = b / (b * b - nan_to_num(ad))
        di = d / (d * d - nan_to_num((bc * d) / a))

    ai /= m
    bi /= m
    di /= m

    ok = is_all_finite(ai) and is_all_finite(bi) and is_all_finite(di)
    if not ok:
        ok = logical_and.reduce([isfinite(ai), isfinite(bi), isfinite(di)])
        nok = logical_not(ok)
        U, S, VT = hsvd(a[nok], b[nok], d[nok])

        maxi = maximum(npy_abs(S[0]), npy_abs(S[1]))
        cutoff = rcond * maxi

        large = S[0] > cutoff
        S[0] = divide(1, S[0], where=large, out=S[0])
        S[0][~large] = 0

        large = S[1] > cutoff
        S[1] = divide(1, S[1], where=large, out=S[1])
        S[1][~large] = 0

        SiVT = [[VT[0][0] * S[0], VT[0][1] * S[0]],
                [VT[1][0] * S[1], VT[1][1] * S[1]]]
        Ai = [
            [
                U[0][0] * SiVT[0][0] + U[0][1] * SiVT[1][0],
                U[0][0] * SiVT[0][1] + U[0][1] * SiVT[1][1],
            ],
            [
                U[1][0] * SiVT[0][0] + U[1][1] * SiVT[1][0],
                U[1][0] * SiVT[0][1] + U[1][1] * SiVT[1][1],
            ],
        ]
        ai[nok] = Ai[0][0] / m
        bi[nok] = Ai[0][1] / m
        di[nok] = Ai[1][1] / m

    return ai, bi, di
Exemple #2
0
    def __init__(self, M, Q, S, overdispersion):
        self._cache_SQt = LRUCache(maxsize=1)
        self._cache_m = LRUCache(maxsize=1)
        self._cache_K = LRUCache(maxsize=1)
        self._cache_diagK = LRUCache(maxsize=1)
        self._cache_update = LRUCache(maxsize=1)
        self._cache_lml_components = LRUCache(maxsize=1)
        self._cache_L = LRUCache(maxsize=1)
        self._cache_A = LRUCache(maxsize=1)
        self._cache_C = LRUCache(maxsize=1)
        self._cache_BiQt = LRUCache(maxsize=1)
        self._cache_QBiQtAm = LRUCache(maxsize=1)
        self._cache_QBiQtCteta = LRUCache(maxsize=1)

        self._logger = logging.getLogger(__name__)

        if not is_all_finite(Q) or not is_all_finite(isfinite(S)):
            raise ValueError("There are non-finite numbers in the provided" +
                             " eigen decomposition.")

        if S.min() <= 0:
            raise ValueError("The provided covariance matrix is not" +
                             " positive-definite because the minimum" +
                             " eigvalue is %f." % S.min())

        make_sure_reasonable_conditioning(S)

        self._S = S
        self._Q = Q
        self.__QSQt = None

        nsamples = M.shape[0]
        self._previous_sitelik_tau = zeros(nsamples)
        self._previous_sitelik_eta = zeros(nsamples)

        self._sitelik_tau = zeros(nsamples)
        self._sitelik_eta = zeros(nsamples)

        self._cav_tau = zeros(nsamples)
        self._cav_eta = zeros(nsamples)

        self._joint_tau = zeros(nsamples)
        self._joint_eta = zeros(nsamples)

        self._v = None
        self._delta = 0
        self._overdispersion = overdispersion
        self._tM = None
        self.__tbeta = None
        self._covariate_setup(M)

        self._loghz = empty(nsamples)
        self._hmu = empty(nsamples)
        self._hvar = empty(nsamples)
        self._ep_params_initialized = False
Exemple #3
0
def assert_finite(Y, M, K):
    from numpy_sugar import is_all_finite

    if not is_all_finite(Y):
        raise ValueError("Outcome must have finite values only.")

    if not is_all_finite(M):
        raise ValueError("Covariates must have finite values only.")

    if K is not None:
        if not is_all_finite(K):
            raise ValueError("Covariate matrix must have finite values only.")
Exemple #4
0
    def __init__(self, y, Q0, Q1, S0, covariates=None):
        super(FastLMM, self).__init__(logistic=Scalar(0.0))

        if not is_all_finite(y):
            raise ValueError("There are non-finite values in the phenotype.")

        self._flmmc = FastLMMCore(y, covariates, Q0, Q1, S0)
        self.set_nodata()
Exemple #5
0
 def _optimal_beta_nom(self):
     A = self._A()
     C = self._C()
     teta = self._sitelik_eta
     Cteta = C * teta
     v = Cteta - A * self._QBiQtCteta()
     if not is_all_finite(v):
         raise ValueError("beta_nom should not be %s." % str(v))
     return v
Exemple #6
0
 def _optimal_tbeta_denom(self):
     L = self._L()
     Q = self._Q
     AM = ddot(self._A(), self._tM, left=True)
     QBiQtAM = dot(Q, cho_solve(L, dot(Q.T, AM)))
     v = dot(self._tM.T, AM) - dot(AM.T, QBiQtAM)
     if not is_all_finite(v):
         raise ValueError("tbeta_denom should not be %s." % str(v))
     return v
Exemple #7
0
    def __init__(self, nsuccesses, ntrials):
        self.nsuccesses = ascontiguousarray(nsuccesses, dtype=float)
        self.ntrials = ascontiguousarray(ntrials, dtype=float)
        self.likelihood_name = 'Binomial'

        if is_all_equal(nsuccesses):
            raise ValueError("The phenotype array has a single unique value" +
                             " only.")

        if not is_all_finite(nsuccesses):
            raise ValueError("There are non-finite numbers in phenotype.")
Exemple #8
0
    def __init__(self, y, mean, cov):
        from numpy_sugar import is_all_finite

        super(GP, self).__init__("GP", composite=[mean, cov])

        if not is_all_finite(y):
            raise ValueError("There are non-finite values in the phenotype.")

        self._y = y
        self._cov = cov
        self._mean = mean
Exemple #9
0
    def _gradient_over_both(self):
        self._update()

        v = self.v
        delta = self.delta
        Q = self._Q
        S = self._S
        A = self._A()
        AQ = ddot(A, Q, left=True)
        SQt = ddot(S, Q.T, left=True)
        BiQt = self._BiQt()
        uBiQtAK0, uBiQtAK1 = self._uBiQtAK()

        C = self._C()
        m = self.m()
        teta = self._sitelik_eta
        Q = self._Q
        As = A.sum()

        Am = A * m
        Em = Am - A * self._QBiQtAm()

        Cteta = C * teta
        Eu = Cteta - A * self._QBiQtCteta()

        u = Em - Eu
        uKu = dot(u, self._Kdot(u))
        tr1 = trace2(AQ, uBiQtAK0)
        tr2 = trace2(AQ, uBiQtAK1)

        dv = uKu / v
        dv -= (1 - delta) * trace2(AQ, SQt)
        dv -= delta * As
        dv += (1 - delta) * tr1
        dv += delta * tr2
        dv /= 2

        dd = delta / (1 - delta)
        ddelta = -tr1
        ddelta -= dd * tr2
        ddelta += trace2(AQ, ddot(BiQt, A, left=False)) * (dd + 1)
        ddelta += (dd + 1) * dot(u, u)
        ddelta += trace2(AQ, SQt)
        ddelta -= As
        ddelta *= v
        ddelta -= uKu / (1 - delta)
        ddelta /= 2

        v = asarray([dv, ddelta])

        if not is_all_finite(v):
            raise ValueError("LML gradient should not be %s." % str(v))

        return v
Exemple #10
0
 def _tbeta(self, value):
     self._cache_lml_components.clear()
     self._cache_QBiQtAm.clear()
     self._cache_m.clear()
     self._cache_update.clear()
     if not is_all_finite(value):
         raise ValueError("tbeta should not be %s." % str(value))
     if self.__tbeta is None:
         self.__tbeta = asarray(value, float).copy()
     else:
         self.__tbeta[:] = value
Exemple #11
0
    def scan(self, M):
        """
        LML, fixed-effect sizes, and scale of the candidate set.

        Parameters
        ----------
        M : array_like
            Fixed-effects set.

        Returns
        -------
        lml : float
            Log of the marginal likelihood.
        effsizes0 : ndarray
            Covariates fixed-effect sizes.
        effsizes0_se : ndarray
            Covariates fixed-effect size standard errors.
        effsizes1 : ndarray
            Candidate set fixed-effect sizes.
        effsizes1_se : ndarray
            Candidate fixed-effect size standard errors.
        scale : ndarray
            Optimal scale.
        """
        from numpy_sugar.linalg import ddot
        from numpy_sugar import is_all_finite

        M = asarray(M, float)

        if M.shape[1] == 0:
            return {
                "lml": self.null_lml(),
                "effsizes0": self.null_beta,
                "effsizes0_se": self.null_beta_se,
                "effsizes1": empty((0)),
                "effsizes1_se": empty((0)),
                "scale": self.null_scale,
            }

        if not is_all_finite(M):
            raise ValueError("M parameter has non-finite elements.")

        MTQ = [dot(M.T, Q) for Q in self._QS[0] if Q.size > 0]
        yTBM = [dot(i, j.T) for (i, j) in zip(self._yTQDi, MTQ)]
        XTBM = [dot(i, j.T) for (i, j) in zip(self._XTQDi, MTQ)]
        D = self._D
        MTBM = [ddot(i, 1 / j) @ i.T for i, j in zip(MTQ, D) if j.min() > 0]

        return self._multicovariate_set(yTBM, XTBM, MTBM)
Exemple #12
0
 def append(self, K, name=None):
     from numpy_sugar import is_all_finite
     from numpy import asarray
     from glimix_core.cov import GivenCov
     data = conform_dataset(self._y, K=K)
     K = asarray(data["K"], float)
     if not is_all_finite(K):
         raise ValueError("Covariance-matrix values must be finite.")
     K = K / K.diagonal().mean()
     cov = GivenCov(K)
     if name is None:
         name = "unnamed-{}".format(self._unnamed)
         self._unnamed += 1
     cov.name = name
     self._covariance.append(cov)
Exemple #13
0
    def scan(self, M):
        """
        LML, fixed-effect sizes, and scale of the candidate set.

        Parameters
        ----------
        M : array_like
            Fixed-effects set.

        Returns
        -------
        lml : float
            Log of the marginal likelihood.
        effsizes0 : ndarray
            Covariates fixed-effect sizes.
        effsizes0_se : ndarray
            Covariates fixed-effect size standard errors.
        effsizes1 : ndarray
            Candidate set fixed-effect sizes.
        effsizes1_se : ndarray
            Candidate fixed-effect size standard errors.
        scale : ndarray
            Optimal scale.
        """
        from numpy_sugar import is_all_finite

        M = asarray(M, float)

        if M.shape[1] == 0:
            return {
                "lml": self._null_lml,
                "effsizes0": self.null_beta,
                "effsizes0_se": self.null_beta_se,
                "effsizes1": empty((0)),
                "effsizes1_se": empty((0)),
                "scale": self.null_scale,
            }

        if not is_all_finite(M):
            raise ValueError("M parameter has non-finite elements.")

        BM = self._B.dot(M)
        yTBM = self._y.T @ BM
        XTBM = self._X.T @ BM
        MTBM = M.T @ BM

        return self._multicovariate_set(yTBM, XTBM, MTBM)
Exemple #14
0
    def scan(self, A1, X1):
        """
        LML, fixed-effect sizes, and scale of the candidate set.

        Parameters
        ----------
        A1 : (p, e) array_like
            Trait-by-environments design matrix.
        X1 : (n, m) array_like
            Variants set matrix.

        Returns
        -------
        lml : float
            Log of the marginal likelihood for the set.
        effsizes0 : (c, p) ndarray
            Fixed-effect sizes for the covariates.
        effsizes0_se : (c, p) ndarray
            Fixed-effect size standard errors for the covariates.
        effsizes1 : (m, e) ndarray
            Fixed-effect sizes for the candidates.
        effsizes1_se : (m, e) ndarray
            Fixed-effect size standard errors for the candidates.
        scale : float
            Optimal scale.
        """
        from numpy import empty
        from numpy.linalg import multi_dot
        from numpy_sugar import epsilon, is_all_finite
        from scipy.linalg import cho_solve

        A1 = asarray(A1, float)
        X1 = asarray(X1, float)

        if not is_all_finite(A1):
            raise ValueError("A1 parameter has non-finite elements.")

        if not is_all_finite(X1):
            raise ValueError("X1 parameter has non-finite elements.")

        if A1.shape[1] == 0:
            beta_se = sqrt(self.null_beta_covariance.diagonal())
            return {
                "lml": self.null_lml(),
                "effsizes0": unvec(self.null_beta, (self._ncovariates, -1)),
                "effsizes0_se": unvec(beta_se, (self._ncovariates, -1)),
                "effsizes1": empty((0, )),
                "effsizes1_se": empty((0, )),
                "scale": self.null_scale,
            }

        X1X1 = X1.T @ X1
        XX1 = self._X.T @ X1
        AWA1 = self._WA.T @ A1
        A1W = A1.T @ self._W
        GX1 = self._G.T @ X1

        MRiM1 = kron(AWA1, XX1)
        M1RiM1 = kron(A1W @ A1, X1X1)

        M1Riy = vec(multi_dot([X1.T, self._Y, A1W.T]))
        XRiM1 = kron(self._WL0.T @ A1, GX1)
        ZiXRiM1 = cho_solve(self._Lz, XRiM1)

        MRiXZiXRiM1 = self._XRiM.T @ ZiXRiM1
        M1RiXZiXRiM1 = XRiM1.T @ ZiXRiM1
        M1RiXZiXRiy = XRiM1.T @ self._ZiXRiy

        T0 = [[self._MRiM, MRiM1], [MRiM1.T, M1RiM1]]
        T1 = [[self._MRiXZiXRiM, MRiXZiXRiM1], [MRiXZiXRiM1.T, M1RiXZiXRiM1]]
        T2 = [self._MRiy, M1Riy]
        T3 = [self._MRiXZiXRiy, M1RiXZiXRiy]

        MKiM = block(T0) - block(T1)
        MKiy = block(T2) - block(T3)
        beta = rsolve(MKiM, MKiy)

        mKiy = beta.T @ MKiy
        cp = self._ntraits * self._ncovariates
        effsizes0 = unvec(beta[:cp], (self._ncovariates, self._ntraits))
        effsizes1 = unvec(beta[cp:], (X1.shape[1], A1.shape[1]))

        np = self._nsamples * self._ntraits
        sqrtdot = self._yKiy - mKiy
        scale = clip(sqrtdot / np, epsilon.tiny, inf)
        lml = self._static_lml() / 2 - np * safe_log(scale) / 2 - np / 2

        effsizes_se = sqrt(
            clip(scale * pinv(MKiM).diagonal(), epsilon.tiny, inf))
        effsizes0_se = unvec(effsizes_se[:cp],
                             (self._ncovariates, self._ntraits))
        effsizes1_se = unvec(effsizes_se[cp:], (X1.shape[1], A1.shape[1]))

        return {
            "lml": lml,
            "effsizes0": effsizes0,
            "effsizes1": effsizes1,
            "scale": scale,
            "effsizes0_se": effsizes0_se,
            "effsizes1_se": effsizes1_se,
        }
Exemple #15
0
    def __init__(self, y, X, QS=None, restricted=False):
        """
        Constructor.

        Parameters
        ----------
        y : array_like
            Outcome.
        X : array_like
            Covariates as a two-dimensional array.
        QS : tuple
            Economic eigendecompositon in form of ``((Q0, ), S0)`` of a
            covariance matrix ``K``.
        restricted : bool
            ``True`` for restricted maximum likelihood optimization; ``False``
            otherwise. Defaults to ``False``.
        """
        from numpy_sugar import is_all_finite

        logistic = Scalar(0.0)
        logistic.listen(self._delta_update)
        logistic.bounds = (-numbers.logmax, +numbers.logmax)
        Function.__init__(self, "LMM", logistic=logistic)
        self._logistic = logistic

        y = asarray(y, float).ravel()
        if not is_all_finite(y):
            raise ValueError("There are non-finite values in the outcome.")

        if len(y) == 0:
            raise ValueError("The outcome array is empty.")

        X = atleast_2d(asarray(X, float).T).T
        if not is_all_finite(X):
            raise ValueError("There are non-finite values in the covariates matrix.")

        self._optimal = {"beta": False, "scale": False}
        if QS is None:
            QS = economic_qs_zeros(len(y))
            self._B = B(QS[0][0], QS[1], 0.0, 1.0)
            self.delta = 1.0
            logistic.fix()
        else:
            self._B = B(QS[0][0], QS[1], 0.5, 0.5)
            self.delta = 0.5

        if QS[0][0].shape[0] != len(y):
            msg = "Sample size differs between outcome and covariance decomposition."
            raise ValueError(msg)

        if y.shape[0] != X.shape[0]:
            msg = "Sample size differs between outcome and covariates."
            raise ValueError(msg)

        self._y = y
        self._Q0 = QS[0][0]
        self._S0 = QS[1]
        self._Xsvd = SVD(X)
        self._tbeta = zeros(self._Xsvd.rank)
        self._scale = 1.0
        self._fix = {"beta": False, "scale": False}
        self._restricted = restricted
Exemple #16
0
    def __init__(self, Y, A, X, G, rank=1, restricted=False):
        """
        Constructor.

        Parameters
        ----------
        Y : (n, p) array_like
            Outcome matrix.
        A : (n, n) array_like
            Trait-by-trait design matrix.
        X : (n, c) array_like
            Covariates design matrix.
        G : (n, r) array_like
            Matrix G from the GGрхђ term.
        rank : optional, int
            Maximum rank of matrix CРѓђ. Defaults to ``1``.
        """
        from numpy_sugar import is_all_finite

        Y = asfortranarray(Y, float)
        yrank = matrix_rank(Y)
        if Y.shape[1] > yrank:
            warnings.warn(
                f"Y is not full column rank: rank(Y)={yrank}. " +
                "Convergence might be problematic.",
                UserWarning,
            )

        A = asarray(A, float)
        X = asarray(X, float)
        Xrank = matrix_rank(X)
        if X.shape[1] > Xrank:
            warnings.warn(
                f"X is not full column rank: rank(X)={Xrank}. " +
                "Convergence might be problematic.",
                UserWarning,
            )
        G = asarray(G, float).copy()
        self._G_norm = max(G.min(), G.max())
        G /= self._G_norm

        if not is_all_finite(Y):
            raise ValueError(
                "There are non-finite values in the outcome matrix.")

        if not is_all_finite(A):
            msg = "There are non-finite values in the trait-by-trait design matrix."
            raise ValueError(msg)

        if not is_all_finite(X):
            raise ValueError(
                "There are non-finite values in the covariates matrix.")

        if not is_all_finite(G):
            raise ValueError("There are non-finite values in the G matrix.")

        self._Y = Y
        self._cov = Kron2SumCov(G, Y.shape[1], rank)
        self._cov.listen(self._parameters_update)
        self._mean = KronMean(A, X)
        self._cache = {"terms": None}
        self._restricted = restricted
        composite = [("C0", self._cov.C0), ("C1", self._cov.C1)]
        Function.__init__(self, "Kron2Sum", composite=composite)

        nparams = self._mean.nparams + self._cov.nparams
        if nparams > Y.size:
            msg = "The number of parameters is larger than the outcome size."
            msg += " Convergence is expected to be problematic."
            warnings.warn(msg, UserWarning)
Exemple #17
0
def estimate(y, lik, K, M=None, verbose=True):
    r"""Estimate the so-called narrow-sense heritability.

    It supports Normal, Bernoulli, Probit, Binomial, and Poisson phenotypes.
    Let :math:`N` be the sample size and :math:`S` the number of covariates.

    Parameters
    ----------
    y : array_like
        Either a tuple of two arrays of `N` individuals each (Binomial
        phenotypes) or an array of `N` individuals (Normal, Poisson, or
        Bernoulli phenotypes). If a continuous phenotype is provided (i.e., a Normal
        one), make sure they have been normalised in such a way that its values are
        not extremely large; it might cause numerical errors otherwise. For example,
        by using :func:`limix.qc.mean_standardize` or
        :func:`limix.qc.quantile_gaussianize`.
    lik : "normal", "bernoulli", "probit", binomial", "poisson"
        Sample likelihood describing the residual distribution.
    K : array_like
        :math:`N`-by-:math:`N` covariance matrix. It might be, for example, the
        estimated kinship relationship between the individuals. The provided matrix will
        be normalised via the function :func:`limix.qc.normalise_covariance`.
    M : array_like, optional
        :math:`N` individuals by :math:`S` covariates.
        It will create a :math:`N`-by-:math:`1` matrix ``M`` of ones representing the offset
        covariate if ``None`` is passed. If an array is passed, it will used as is.
        Defaults to ``None``.
    verbose : bool, optional
        ``True`` to display progress and summary; ``False`` otherwise.

    Returns
    -------
    float
        Estimated heritability.

    Examples
    --------
    .. doctest::

        >>> from numpy import dot, exp, sqrt
        >>> from numpy.random import RandomState
        >>> from limix.her import estimate
        >>>
        >>> random = RandomState(0)
        >>>
        >>> G = random.randn(150, 200) / sqrt(200)
        >>> K = dot(G, G.T)
        >>> z = dot(G, random.randn(200)) + random.randn(150)
        >>> y = random.poisson(exp(z))
        >>>
        >>> print('%.3f' % estimate(y, 'poisson', K, verbose=False))  # doctest: +FLOAT_CMP
        0.183

    Notes
    -----
    It will raise a ``ValueError`` exception if non-finite values are passed. Please,
    refer to the :func:`limix.qc.mean_impute` function for missing value imputation.
    """
    from numpy_sugar import is_all_finite
    from numpy_sugar.linalg import economic_qs
    from numpy import ones, pi, var
    from glimix_core.glmm import GLMMExpFam
    from glimix_core.lmm import LMM

    if not isinstance(lik, (tuple, list)):
        lik = (lik,)

    lik_name = lik[0].lower()
    check_likelihood_name(lik_name)

    with session_block("heritability analysis", disable=not verbose):

        if M is None:
            M = ones((len(y), 1))

        with session_line("Normalising input...", disable=not verbose):
            data = conform_dataset(y, M=M, K=K)

        y = data["y"]
        M = data["M"]
        K = data["K"]

        if not is_all_finite(y):
            raise ValueError("Outcome must have finite values only.")

        if not is_all_finite(M):
            raise ValueError("Covariates must have finite values only.")

        if K is not None:
            if not is_all_finite(K):
                raise ValueError("Covariate matrix must have finite values only.")

            K = normalise_covariance(K)

        y = normalise_extreme_values(y, lik)

        if K is not None:
            QS = economic_qs(K)
        else:
            QS = None

        if lik_name == "normal":
            method = LMM(y.values, M.values, QS)
            method.fit(verbose=verbose)
        else:
            method = GLMMExpFam(y, lik, M.values, QS, n_int=500)
            method.fit(verbose=verbose, factr=1e6, pgtol=1e-3)

        g = method.scale * (1 - method.delta)
        e = method.scale * method.delta
        if lik_name == "bernoulli":
            e += pi * pi / 3

        if lik_name == "normal":
            v = method.fixed_effects_variance
        else:
            v = var(method.mean())

        return g / (v + g + e)
Exemple #18
0
def test_is_all_finite():
    assert_equal(is_all_finite([1, -1, 2393.0]), True)
    assert_equal(is_all_finite([1, -1, nan, 2393.0]), False)
    assert_equal(is_all_finite([1, -1, inf, 2393.0]), False)
Exemple #19
0
def st_scan(G, y, lik, K=None, M=None, verbose=True):
    r""" Single-variant association testing via generalised linear mixed models.

    It supports Normal (linear mixed model), Bernoulli, Probit, Binomial, and Poisson
    residual errors, defined by ``lik``.
    The columns of ``G`` define the candidates to be tested for association
    with the phenotype ``y``.
    The covariance matrix is set by ``K``.
    If not provided, or set to ``None``, the generalised linear model
    without random effects is assumed.
    The covariates can be set via the parameter ``M``.
    We recommend to always provide a column of ones when covariates are actually
    provided.

    Parameters
    ----------
    G : array_like
        :math:`N` individuals by :math:`S` candidate markers.
    y : array_like
        An outcome array of :math:`N` individuals.
    lik : tuple, "normal", "bernoulli", "probit", binomial", "poisson"
        Sample likelihood describing the residual distribution.
        Either a tuple or a string specifiying the likelihood is required. The Normal,
        Bernoulli, Probit, and Poisson likelihoods can be selected by providing a
        string. Binomial likelihood on the other hand requires a tuple because of the
        number of trials: ``("binomial", array_like)``.
    K : array_like, optional
        :math:`N`-by-:math:`N` covariance matrix (e.g., kinship coefficients).
        Set to ``None`` for a generalised linear model without random effects.
        Defaults to ``None``.
    M : array_like, optional
        `N` individuals by `S` covariates.
        It will create a :math:`N`-by-:math:`1` matrix ``M`` of ones representing the
        offset covariate if ``None`` is passed. If an array is passed, it will used as
        is. Defaults to ``None``.
    verbose : bool, optional
        ``True`` to display progress and summary; ``False`` otherwise.

    Returns
    -------
    :class:`limix.qtl.QTLModel`
        QTL representation.

    Examples
    --------
    .. doctest::

        >>> from numpy import dot, exp, sqrt, ones
        >>> from numpy.random import RandomState
        >>> from pandas import DataFrame
        >>> import pandas as pd
        >>> from limix.qtl import st_scan
        >>>
        >>> random = RandomState(1)
        >>> pd.options.display.float_format = "{:9.6f}".format
        >>>
        >>> n = 30
        >>> p = 3
        >>> samples_index = range(n)
        >>>
        >>> M = DataFrame(dict(offset=ones(n), age=random.randint(10, 60, n)))
        >>> M.index = samples_index
        >>>
        >>> X = random.randn(n, 100)
        >>> K = dot(X, X.T)
        >>>
        >>> candidates = random.randn(n, p)
        >>> candidates = DataFrame(candidates, index=samples_index,
        ...                                    columns=['rs0', 'rs1', 'rs2'])
        >>>
        >>> y = random.poisson(exp(random.randn(n)))
        >>>
        >>> model = st_scan(candidates, y, 'poisson', K, M=M, verbose=False)
        >>>
        >>> model.variant_pvalues.to_dataframe()  # doctest: +FLOAT_CMP
                         pv
        candidate
        rs0        0.554444
        rs1        0.218996
        rs2        0.552200
        >>> model.variant_effsizes.to_dataframe()  # doctest: +FLOAT_CMP
                   effsizes
        candidate
        rs0       -0.130867
        rs1       -0.315078
        rs2       -0.143869
        >>> model.variant_effsizes_se.to_dataframe()  # doctest: +FLOAT_CMP
                   effsizes std
        candidate
        rs0            0.221390
        rs1            0.256327
        rs2            0.242013
        >>> model  # doctest: +FLOAT_CMP
        Variants
        --------
               effsizes  effsizes_se   pvalues
        count         3            3         3
        mean  -0.196604     0.239910  0.441880
        std    0.102807     0.017563  0.193027
        min   -0.315077     0.221389  0.218996
        25%   -0.229473     0.231701  0.385598
        50%   -0.143869     0.242013  0.552200
        75%   -0.137367     0.249170  0.553322
        max   -0.130866     0.256326  0.554443
        <BLANKLINE>
        Covariate effect sizes for H0
        -----------------------------
              age    offset
        -0.005568  0.395287

    >>> from numpy import zeros
    >>>
    >>> nsamples = 50
    >>>
    >>> X = random.randn(nsamples, 2)
    >>> G = random.randn(nsamples, 100)
    >>> K = dot(G, G.T)
    >>> ntrials = random.randint(1, 100, nsamples)
    >>> z = dot(G, random.randn(100)) / sqrt(100)
    >>>
    >>> successes = zeros(len(ntrials), int)
    >>> for i, nt in enumerate(ntrials):
    ...     for _ in range(nt):
    ...         successes[i] += int(z[i] + 0.5 * random.randn() > 0)
    >>>
    >>> result = st_scan(X, successes, ("binomial", ntrials), K, verbose=False)
    >>> print(result)  # doctest: +FLOAT_CMP
    Variants
    --------
           effsizes  effsizes_se   pvalues
    count         2            2         2
    mean   0.227116     0.509575  0.478677
    std    0.567975     0.031268  0.341791
    min   -0.174503     0.487466  0.236994
    25%    0.026307     0.498520  0.357835
    50%    0.227116     0.509575  0.478677
    75%    0.427925     0.520630  0.599518
    max    0.628735     0.531685  0.720359
    <BLANKLINE>
    Covariate effect sizes for H0
    -----------------------------
       offset
     0.409570


    Notes
    -----
    It will raise a ``ValueError`` exception if non-finite values are passed. Please,
    refer to the :func:`limix.qc.mean_impute` function for missing value imputation.
    """
    from numpy_sugar import is_all_finite
    from numpy_sugar.linalg import economic_qs

    if not isinstance(lik, (tuple, list)):
        lik = (lik,)

    lik_name = lik[0].lower()
    lik = (lik_name,) + lik[1:]
    check_likelihood_name(lik_name)

    with session_block("qtl analysis", disable=not verbose):

        with session_line("Normalising input... ", disable=not verbose):
            data = conform_dataset(y, M, G=G, K=K)

        y = data["y"]
        M = data["M"]
        G = data["G"]
        K = data["K"]

        if not is_all_finite(y):
            raise ValueError("Outcome must have finite values only.")

        if not is_all_finite(M):
            raise ValueError("Covariates must have finite values only.")

        if K is not None:
            if not is_all_finite(K):
                raise ValueError("Covariate matrix must have finite values only.")
            QS = economic_qs(K)
        else:
            QS = None

        y = normalise_extreme_values(data["y"], lik)

        if lik_name == "normal":
            model = _perform_lmm(y.values, M, QS, G, verbose)
        else:
            model = _perform_glmm(y.values, lik, M, K, QS, G, verbose)

        if verbose:
            print(model)

        return model
Exemple #20
0
 def beta(self, value):
     if not is_all_finite(value):
         raise ValueError("beta should not be %s." % str(value))
     self._tbeta = self._svd_S12 * dot(self._svd_V.T, value)
Exemple #21
0
def scan(phenotype, X, G=None, K=None, covariates=None, progress=True,
         options=None):
    """Association between genetic variants and phenotype.

    Matrix `X` shall contain the genetic markers (e.g., number of minor
    alleles) with rows and columns representing samples and genetic markers,
    respectively.

    The user must specify only one of the parameters `G` and `K` for defining
    the genetic background.

    Let :math:`N` be the sample size, :math:`S` the number of covariates,
    :math:`P_c` the number of genetic markers to be tested, and :math:`P_b`
    the number of genetic markers used for Kinship estimation.

    Args:
        y          (array_like): Phenotype. Dimension (:math:`N\\times 0`).
        X          (array_like): Candidate genetic markers (or any other
                                 type of explanatory variable) whose
                                 association with the phenotype will be
                                 tested. Dimension (:math:`N\\times P_c`).
        G          (array_like): Genetic markers matrix used internally for
                                 kinship estimation. Dimension
                                 (:math:`N\\times P_b`).
        K          (array_like): Kinship matrix. Dimension
                                 (:math:`N\\times N`).
        covariates (array_like): Covariates. Default is an offset.
                                 Dimension (:math:`N\\times S`).
        progress    (bool)     : Shows progress. Defaults to `True`.

    Returns:
        A :class:`lim.genetics.qtl._canonical.CanonicalLRTScan` instance.
    """
    logger = logging.getLogger(__name__)
    logger.info('%s association scan has started.', phenotype.likelihood_name)

    if options is None:
        options = dict()

    if 'fast' not in options:
        options['fast'] = True

    if 'rank_norm' not in options:
        options['rank_norm'] = True

    n = phenotype.sample_size
    covariates = ones((n, 1)) if covariates is None else covariates

    X = _clone(X)
    G = _clone(G)
    K = _clone(K)

    if not is_all_finite(X):
        raise ValueError("The candidate matrix X has non-finite values.")

    if G is not None and not is_all_finite(G):
        raise ValueError("The genetic markers matrix G has non-finite values.")

    if K is not None and not is_all_finite(K):
        raise ValueError("The Kinship matrix K has non-finite values.")

    background = Background()

    (Q0, Q1, S0) = _genetic_preprocess(X, G, K, background)
    qtl = QTLScan(phenotype, covariates, X, Q0, Q1, S0, options)
    qtl.progress = progress
    qtl.compute_statistics()

    return qtl