Ejemplo n.º 1
0
def testLassoBackgroundModel(estimator, Xfg, Xbg, y, use_Kgeno=True, **kwargs):
    """
    run a linear mixed model on the foreground SNPs (Xfg), while estimating SNPs for the background covariance matrix via the Lasso usting the background SNPs(Xbg).

    estimator   : (Lmm-)Lasso model
    Xbg         : SNP data for the background model
    Xfg         : SNP data for the foreground model
    y           : phenotype vector
    use_Kgene   : if True (default), background covariance matrix is estimated and used.
    """
    if use_Kgeno:
        Kbg = compute_linear_kernel(Xbg)
        estimator.fit(Xfg, y, Kbg)
        iactive = estimator.coef_ != 0

        if iactive.any():
            Kfg = compute_linear_kernel(Xbg, iactive)
            vd = VarianceDecomposition(y)
            vd.addRandomEffect(is_noise=True)
            vd.addRandomEffect(Kbg)
            vd.addRandomEffect(Kfg)
            vd.optimize()
            K = vd.gp.getCovar().K()
        else:
            K = Kbg

    else:
        estimator.fit(Xbg, y)
        idx = estimator.coef_ != 0
        K = compute_linear_kernel(Xbg, idx)

    lmm = qtl.test_lmm(Xfg, y, K=K, **kwargs)
    pv = lmm.getPv()

    return pv
Ejemplo n.º 2
0
    def fit(self, X, y, K, standardize=False, verbose=False, **lasso_args):
        """
        fitting the model
    
        X: SNP data
        y: phenotype data
        K: backgroundcovariance matrix
        standardize: if True, genotypes and phenotypes are standardized
        """

        if y.ndim == 2:
            assert y.shape[
                1] == 1, 'Only one phenotype can be processed at at time.'
            y = y.flatten()
        time_start = time.time()
        [n_s, n_f] = X.shape
        assert X.shape[0] == y.shape[0], 'dimensions do not match'
        assert K.shape[0] == K.shape[1], 'dimensions do not match'
        assert K.shape[0] == X.shape[0], 'dimensions do not match'
        """ standardizing genotypes and phenotypes """
        if standardize:
            X -= X.mean(axis=0)
            X /= X.std(axis=0)
            y -= y.mean(axis=0)
            y /= y.std(axis=0)
        """ training null model """
        vd = VarianceDecomposition(y)
        vd.addRandomEffect(is_noise=True)
        vd.addRandomEffect(K)
        vd.optimize()
        varComps = vd.getVarianceComps()
        delta0 = varComps[0, 0] / varComps.sum()
        self.varComps = varComps

        S, U = LA.eigh(K)
        """ rotating data """
        Sdi = 1. / (S + delta0)
        Sdi_sqrt = SP.sqrt(Sdi)
        SUX = SP.dot(U.T, X)
        SUX = SUX * SP.tile(Sdi_sqrt, (n_f, 1)).T
        SUy = SP.dot(U.T, y)
        SUy = Sdi_sqrt * SUy
        """ fitting lasso """
        super(LmmLasso, self).fit(SUX, SUy, **lasso_args)
        yhat = super(LmmLasso, self).predict(X)
        self.w_ridge = LA.solve(K + delta0 * SP.eye(n_s), y - yhat)

        time_end = time.time()
        time_diff = time_end - time_start
        if verbose: print '... finished in %.2fs' % (time_diff)

        return self
Ejemplo n.º 3
0
def testLassoBackgroundModel(estimator,Xfg,Xbg,y,use_Kgeno=True,**kwargs):
    """
    run a linear mixed model on the foreground SNPs (Xfg), while estimating SNPs for the background covariance matrix via the Lasso usting the background SNPs(Xbg).

    estimator   : (Lmm-)Lasso model
    Xbg         : SNP data for the background model
    Xfg         : SNP data for the foreground model
    y           : phenotype vector
    use_Kgene   : if True (default), background covariance matrix is estimated and used.
    """
    if use_Kgeno:
        Kbg = compute_linear_kernel(Xbg)
        estimator.fit(Xfg,y,Kbg)
        iactive = estimator.coef_!=0

        if iactive.any():
            Kfg = compute_linear_kernel(Xbg,iactive)
            vd = VarianceDecomposition(y)
            vd.addRandomEffect(is_noise=True)
            vd.addRandomEffect(Kbg)
            vd.addRandomEffect(Kfg)
            vd.optimize()
            K = vd.gp.getCovar().K()
        else:
            K = Kbg

    else:
        estimator.fit(Xbg,y)
        idx = estimator.coef_!=0
        K   = compute_linear_kernel(Xbg,idx)

    lmm = qtl.test_lmm(Xfg,y,K=K,**kwargs)
    pv  = lmm.getPv()

    return pv
Ejemplo n.º 4
0
    def fit(self, X, y, K, standardize=False, verbose=False,**lasso_args):
        """
        fitting the model

        X: SNP data
        y: phenotype data
        K: backgroundcovariance matrix
        standardize: if True, genotypes and phenotypes are standardized
        """

        if y.ndim == 2:
            assert y.shape[1]==1, 'Only one phenotype can be processed at at time.'
            y = y.flatten()
        time_start = time.time()
        [n_s, n_f] = X.shape
        assert X.shape[0] == y.shape[0], 'dimensions do not match'
        assert K.shape[0] == K.shape[1], 'dimensions do not match'
        assert K.shape[0] == X.shape[0], 'dimensions do not match'

        """ standardizing genotypes and phenotypes """
        if standardize:
            X -= X.mean(axis=0)
            X /= X.std(axis=0)
            y -= y.mean(axis=0)
            y /= y.std(axis=0)

        """ training null model """
        vd = VarianceDecomposition(y)
        vd.addRandomEffect(is_noise=True)
        vd.addRandomEffect(K)
        vd.optimize()
        varComps = vd.getVarianceComps()
        delta0   = varComps[0,0]/varComps.sum()
        self.varComps = varComps

        S,U = LA.eigh(K)

        """ rotating data """
        Sdi = 1. / (S + delta0)
        Sdi_sqrt = SP.sqrt(Sdi)
        SUX = SP.dot(U.T, X)
        SUX = SUX * SP.tile(Sdi_sqrt, (n_f, 1)).T
        SUy = SP.dot(U.T, y)
        SUy = Sdi_sqrt * SUy

        """ fitting lasso """
        super(LmmLasso, self).fit(SUX, SUy, **lasso_args)
        yhat = super(LmmLasso, self).predict(X)
        self.w_ridge = LA.solve(K + delta0 * SP.eye(n_s), y - yhat)

        time_end = time.time()
        time_diff = time_end - time_start
        if verbose: print '... finished in %.2fs'%(time_diff)

        return self