예제 #1
0
    def setUp(self):
        np.random.seed(1)

        # define phenotype
        N = 200
        P = 2
        Y = sp.randn(N,P)
        # define row caoriance
        f = 10
        G = 1.*(sp.rand(N, f)<0.2)
        X = 1.*(sp.rand(N, f)<0.2)
        R = covar_rescale(sp.dot(X,X.T))
        R+= 1e-4 * sp.eye(N)
        # define col covariances
        Cg = FreeFormCov(P)
        self._Cg = Cg
        Cn = FreeFormCov(P)
        Cg.setCovariance(0.5 * sp.cov(Y.T))
        Cn.setCovariance(0.5 * sp.cov(Y.T))
        # define gp
        self.gp = GP3KronSumLR(Y = Y, Cg = Cg, Cn = Cn, R = R, G = G, rank = 1)
예제 #2
0
class TestGPBase(unittest.TestCase):
    def setUp(self):
        np.random.seed(1)

        # define phenotype
        N = 200
        P = 2
        self.Y = sp.randn(N, P)
        # define fixed effects
        self.F = []
        self.A = []
        self.F.append(1. * (sp.rand(N, 2) < 0.5))
        self.A.append(sp.eye(P))
        # define row covariance
        f = 10
        X = 1. * (sp.rand(N, f) < 0.2)
        self.R = covar_rescale(sp.dot(X, X.T))
        self.R += 1e-4 * sp.eye(N)
        # define col covariances
        self.Cg = FreeFormCov(P)
        self.Cn = FreeFormCov(P)
        self.Cg.setCovariance(0.5 * sp.cov(self.Y.T))
        self.Cn.setCovariance(0.5 * sp.cov(self.Y.T))
        # define gp
        self.gp = GP2KronSum(Y=self.Y,
                             F=self.F,
                             A=self.A,
                             Cg=self.Cg,
                             Cn=self.Cn,
                             R=self.R)

    @unittest.skip("someone has to fix it")
    def test_grad(self):

        gp = self.gp

        def func(x, i):
            params = gp.getParams()
            params['covar'] = x
            gp.setParams(params)
            return gp.LML()

        def grad(x, i):
            params = gp.getParams()
            params['covar'] = x
            gp.setParams(params)
            grad = gp.LML_grad()
            return grad['covar'][i]

        x0 = gp.getParams()['covar']
        err = mcheck_grad(func, grad, x0)
        np.testing.assert_almost_equal(err, 0., decimal=4)

    def test_grad_activation(self):

        gp = self.gp

        self.Cg._K_act = False

        def func(x, i):
            params = gp.getParams()
            params['covar'] = x
            gp.setParams(params)
            return gp.LML()

        def grad(x, i):
            params = gp.getParams()
            params['covar'] = x
            gp.setParams(params)
            grad = gp.LML_grad()
            return grad['covar'][i]

        x0 = gp.getParams()['covar']
        err = mcheck_grad(func, grad, x0)
        np.testing.assert_almost_equal(err, 0., decimal=4)

        self.Cg._K_act = True
        self.Cn._K_act = False

        def func(x, i):
            params = gp.getParams()
            params['covar'] = x
            gp.setParams(params)
            return gp.LML()

        def grad(x, i):
            params = gp.getParams()
            params['covar'] = x
            gp.setParams(params)
            grad = gp.LML_grad()
            return grad['covar'][i]

        x0 = gp.getParams()['covar']
        err = mcheck_grad(func, grad, x0)
        np.testing.assert_almost_equal(err, 0., decimal=4)

    def test_correct_inputs(self):
        np.asarray(None, dtype=float)
예제 #3
0
def run_lmm(reader,
            pheno,
            R=None,
            S_R=None,
            U_R=None,
            W=None,
            covs=None,
            batch_size=1000,
            unique_variants=False):
    """
    Utility function to run StructLMM

    Parameters
    ----------
    reader : :class:`limix.data.BedReader`
        limix bed reader instance.
    pheno : (`N`, 1) ndarray
        phenotype vector
    R : (`N`, `N`) ndarray
        covariance of the random effect.
        Typically this is the genetic relatedness matrix.
        If set, ``W``, ``S_R`` and ``U_R`` are ignored.
    S_R : (`N`, ) ndarray
        eigenvalues of ``R``. If available together with the eigenvectors
        ``U_R``, they can be provided instead of ``R`` to avoid
        repeated computations.
        Only used when ``R`` is not set.
        If set, ``U_R`` should also be specified.
    U_R : (`N`, `N`) ndarray
        eigenvectors of ``R``. If available together with the eigenvalues
        ``S_R``, they can be provided instead of ``R`` to avoid
        repeated computations.
        Only used when ``R`` is not set.
        If set, ``S_R`` should also be specified.
    W : (`N`, `K`) ndarray
        this defines the covariance of a lowrank random effect.
        Setting ``W`` is equivalent to setting ``R = dot(W, W.T)``
        but ``R`` is never computed to minimize memory usage.
        Only used when ``R``, ``U_R`` and ``S_R`` are not set.
    covs : (`N`, L) ndarray
        fixed effect design for covariates `N` samples and `L` covariates.
        If None (dafault value), an intercept only is considered.
    batch_size : int
        to minimize memory usage the analysis is run in batches.
        The number of variants loaded in a batch
        (loaded into memory at the same time).
    no_interaction_test : bool
        if True the interaction test is not consdered.
        Teh default value is True.
    unique_variants : bool
        if True, only non-repeated genotypes are considered
        The default value is False.

    Returns
    -------
    res : *:class:`pandas.DataFrame`*
        contains pv, effect size, standard error on effect size,
        and test statistcs as well as variant info.
    """
    if covs is None:
        covs = sp.ones((pheno.shape[0], 1))

    # calc S_R, U_R if R is specified
    if R is not None:
        S_R, U_R = la.eigh(R)

    # assert that S_R and U_R are both specified
    S_is = S_R is not None
    U_is = U_R is not None
    if S_is or U_is:
        assert S_is and U_is, 'Both U_R and S_R should be specified'

    # assert semidefinite positiveness
    if S_R is not None:
        if S_R.min() < 1e-4:
            offset = S_R.min() + 1e-4
            S_R += offset
            warnings.warn("Added %.2e jitter to make R a SDP cov" % offset)

    # fit null
    if R is not None:
        from limix_core.gp import GP2KronSum
        from limix_core.covar import FreeFormCov
        Cg = FreeFormCov(1)
        Cn = FreeFormCov(1)
        gp = GP2KronSum(
            Y=pheno, Cg=Cg, Cn=Cn, F=covs, A=sp.eye(1), S_R=S_R, U_R=U_R)
        Cg.setCovariance(0.5 * sp.ones(1, 1))
        Cn.setCovariance(0.5 * sp.ones(1, 1))
        info_opt = gp.optimize(verbose=False)
        covar = gp.covar
    elif W is not None:
        from limix_core.gp import GP2KronSumLR
        from limix_core.covar import FreeFormCov
        gp = GP2KronSumLR(Y=pheno, Cn=FreeFormCov(1), G=W, F=covs, A=sp.eye(1))
        gp.covar.Cr.setCovariance(0.5 * sp.ones((1, 1)))
        gp.covar.Cn.setCovariance(0.5 * sp.ones((1, 1)))
        info_opt = gp.optimize(verbose=False)
        covar = gp.covar
    else:
        covar = None

    # define lmm
    lmm = LMM(pheno, covs, covar)

    n_batches = reader.getSnpInfo().shape[0] / batch_size

    t0 = time.time()

    res = []
    for i, gr in enumerate(GIter(reader, batch_size=batch_size)):
        print('.. batch %d/%d' % (i, n_batches))

        X, _res = gr.getGenotypes(standardize=True, return_snpinfo=True)

        if unique_variants:
            X, idxs = f_univar(X, return_idxs=True)
            Isnp = sp.in1d(sp.arange(_res.shape[0]), idxs)
            _res = _res[Isnp]

        # run lmm
        lmm.process(X)
        pv = lmm.getPv()
        beta = lmm.getBetaSNP()
        beta_ste = lmm.getBetaSNPste()
        lrt = lmm.getLRT()

        # add pvalues, beta, etc to res
        _res = _res.assign(pv=pd.Series(pv, index=_res.index))
        _res = _res.assign(beta=pd.Series(beta, index=_res.index))
        _res = _res.assign(beta_ste=pd.Series(beta_ste, index=_res.index))
        _res = _res.assign(lrt=pd.Series(lrt, index=_res.index))
        res.append(_res)

    res = pd.concat(res)
    res.reset_index(inplace=True, drop=True)

    t = time.time() - t0
    print('%.2f s elapsed' % t)

    return res