Ejemplo n.º 1
0
def forward_lmm(snps,pheno,K=None,covs=None,qvalues=False,threshold = 5e-8, maxiter = 2,test='lrt',**kw_args):
    """
    univariate fixed effects test with forward selection

    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x 1] SP.array of 1 phenotype for N individuals
        K:      [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] SP.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        test:           'lrt' for likelihood ratio test (default) or 'f' for F-test

    Returns:
        lm:             limix LMM object
        iadded:         array of indices of SNPs included in order of inclusion
        pvadded:        array of Pvalues obtained by the included SNPs in iteration
                        before inclusion
        pvall:   [maxiter x S] SP.array of Pvalues for all iterations
    """

    if K is None:
        K=SP.eye(snps.shape[0])
    if covs is None:
        covs = SP.ones((snps.shape[0],1))

    lm = simple_lmm(snps,pheno,K=K,covs=covs,test=test,**kw_args)
    pvall = SP.zeros((maxiter,snps.shape[1]))
    pv = lm.getPv()
    pvall[0:1,:]=pv
    imin= pv.argmin()
    niter = 1
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    if qvalues:
        assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = SP.zeros((maxiter,snps.shape[1]))
        qv  = FDR.qvalues(pv)
        qvall[0:1,:] = qv
        score=qv.min()
    else:
        score=pv.min()
    while (score<threshold) and niter<maxiter:
        t0=time.time()
        iadded.append(imin)
        pvadded.append(pv[0,imin])
        if qvalues:
            qvadded.append(qv[0,imin])
        covs=SP.concatenate((covs,snps[:,imin:(imin+1)]),1)
        lm.setCovs(covs)
        lm.process()
        pv = lm.getPv()
        pvall[niter:niter+1,:]=pv
        imin= pv.argmin()
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter+1,:] = qv
            score = qv.min()
        else:
            score = pv.min()
        t1=time.time()
        print ("finished GWAS testing in %.2f seconds" %(t1-t0))
        niter=niter+1
    RV = {}
    RV['iadded']  = iadded
    RV['pvadded'] = pvadded
    RV['pvall']   = pvall
    if qvalues:
        RV['qvall'] = qvall
        RV['qvadded'] = qvadded
    return lm,RV
Ejemplo n.º 2
0
def forward_lmm(snps,
                pheno,
                K=None,
                covs=None,
                qvalues=False,
                threshold=5e-8,
                maxiter=2,
                test='lrt',
                **kw_args):
    """
    univariate fixed effects test with forward selection
    
    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x 1] SP.array of 1 phenotype for N individuals
        K:      [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] SP.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        test:           'lrt' for likelihood ratio test (default) or 'f' for F-test
    
    Returns:
        lm:             limix LMM object
        iadded:         array of indices of SNPs included in order of inclusion
        pvadded:        array of Pvalues obtained by the included SNPs in iteration
                        before inclusion
        pvall:   [maxiter x S] SP.array of Pvalues for all iterations
    """

    if K is None:
        K = SP.eye(snps.shape[0])
    if covs is None:
        covs = SP.ones((snps.shape[0], 1))

    lm = simple_lmm(snps, pheno, K=K, covs=covs, test=test, **kw_args)
    pvall = SP.zeros((maxiter, snps.shape[1]))
    pv = lm.getPv()
    pvall[0:1, :] = pv
    imin = pv.argmin()
    niter = 1
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    if qvalues:
        assert pv.shape[
            0] == 1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = SP.zeros((maxiter, snps.shape[1]))
        qv = FDR.qvalues(pv)
        qvall[0:1, :] = qv
        score = qv.min()
    else:
        score = pv.min()
    while (score < threshold) and niter < maxiter:
        t0 = time.time()
        iadded.append(imin)
        pvadded.append(pv[0, imin])
        if qvalues:
            qvadded.append(qv[0, imin])
        covs = SP.concatenate((covs, snps[:, imin:(imin + 1)]), 1)
        lm.setCovs(covs)
        lm.process()
        pv = lm.getPv()
        pvall[niter:niter + 1, :] = pv
        imin = pv.argmin()
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter + 1, :] = qv
            score = qv.min()
        else:
            score = pv.min()
        t1 = time.time()
        print("finished GWAS testing in %.2f seconds" % (t1 - t0))
        niter = niter + 1
    RV = {}
    RV['iadded'] = iadded
    RV['pvadded'] = pvadded
    RV['pvall'] = pvall
    if qvalues:
        RV['qvall'] = qvall
        RV['qvadded'] = qvadded
    return lm, RV
Ejemplo n.º 3
0
def forward_lmm_kronecker(snps,phenos,Asnps=None,Acond=None,K1r=None,K1c=None,K2r=None,K2c=None,covs=None,Acovs=None,threshold = 5e-8, maxiter = 2,qvalues=False, update_covariances = False,**kw_args):
    """
    Kronecker fixed effects test with forward selection

    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x P] SP.array of 1 phenotype for N individuals
        K:      [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] SP.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        qvalues:        Use q-value threshold and return q-values in addition (default False)
        update_covar:   Boolean indicator if covariances should be re-estimated after each forward step (default False)

    Returns:
        lm:             lmix LMMi object
        resultStruct with elements:
            iadded:         array of indices of SNPs included in order of inclusion
            pvadded:        array of Pvalues obtained by the included SNPs in iteration
                            before inclusion
            pvall:   [maxiter x S] SP.array of Pvalues for all iterations
        Optional:      corresponding q-values
            qvadded
            qvall
    """

    #0. checks
    N  = phenos.shape[0]
    P  = phenos.shape[1]

    if K1r==None:
        K1r = SP.dot(snps,snps.T)
    else:
        assert K1r.shape[0]==N, 'K1r: dimensions dismatch'
        assert K1r.shape[1]==N, 'K1r: dimensions dismatch'

    if K2r==None:
        K2r = SP.eye(N)
    else:
        assert K2r.shape[0]==N, 'K2r: dimensions dismatch'
        assert K2r.shape[1]==N, 'K2r: dimensions dismatch'

    covs,Acovs = updateKronCovs(covs,Acovs,N,P)

    if Asnps is None:
        Asnps = [SP.ones([1,P])]
    if (type(Asnps)!=list):
        Asnps = [Asnps]
    assert len(Asnps)>0, "need at least one Snp design matrix"

    if Acond is None:
        Acond = Asnps
    if (type(Acond)!=list):
        Acond = [Acond]
    assert len(Acond)>0, "need at least one Snp design matrix"

    #1. run GP model to infer suitable covariance structure
    if K1c==None or K2c==None:
        vc = estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, **kw_args)
        K1c = vc.getEstTraitCovar(0)
        K2c = vc.getEstTraitCovar(1)
    else:
        vc = None
        assert K1c.shape[0]==P, 'K1c: dimensions dismatch'
        assert K1c.shape[1]==P, 'K1c: dimensions dismatch'
        assert K2c.shape[0]==P, 'K2c: dimensions dismatch'
        assert K2c.shape[1]==P, 'K2c: dimensions dismatch'
    t0 = time.time()
    lm,pv = kronecker_lmm(snps=snps,phenos=phenos,Asnps=Asnps,K1r=K1r,K2r=K2r,K1c=K1c,K2c=K2c,covs=covs,Acovs=Acovs)

    #get pv
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    time_el = []
    pvall = SP.zeros((pv.shape[0]*maxiter,pv.shape[1]))
    qvall = None
    t1=time.time()
    print ("finished GWAS testing in %.2f seconds" %(t1-t0))
    time_el.append(t1-t0)
    pvall[0:pv.shape[0],:]=pv
    imin= SP.unravel_index(pv.argmin(),pv.shape)
    score=pv[imin].min()
    niter = 1
    if qvalues:
        assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = SP.zeros((maxiter,snps.shape[1]))
        qv  = FDR.qvalues(pv)
        qvall[0:1,:] = qv
        score=qv[imin]
    #loop:
    while (score<threshold) and niter<maxiter:
        t0=time.time()
        pvadded.append(pv[imin])
        iadded.append(imin)
        if qvalues:
            qvadded.append(qv[imin])
        if update_covariances and vc is not None:
            vc.addFixedTerm(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]])
            vc.setScales()#CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None
            vc.findLocalOptima(fast=True)
            K1c = vc.getEstTraitCovar(0)
            K2c = vc.getEstTraitCovar(1)
            lm.setK1c(K1c)
            lm.setK2c(K2c)
        lm.addCovariates(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]])
        for i in xrange(len(Asnps)):
            #add SNP design
            lm.setSNPcoldesign(Asnps[i])
            lm.process()
            pv[i,:] = lm.getPv()[0]
        pvall[niter*pv.shape[0]:(niter+1)*pv.shape[0]]=pv
        imin= SP.unravel_index(pv.argmin(),pv.shape)
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter+1,:] = qv
            score = qv[imin].min()
        else:
            score = pv[imin].min()
        t1=time.time()
        print ("finished GWAS testing in %.2f seconds" %(t1-t0))
        time_el.append(t1-t0)
        niter=niter+1
    RV = {}
    RV['iadded']  = iadded
    RV['pvadded'] = pvadded
    RV['pvall']   = pvall
    RV['time_el'] = time_el
    if qvalues:
        RV['qvall'] = qvall
        RV['qvadded'] = qvadded
    return lm,RV
Ejemplo n.º 4
0
def forward_lmm_kronecker(snps,
                          phenos,
                          Asnps=None,
                          Acond=None,
                          K1r=None,
                          K1c=None,
                          K2r=None,
                          K2c=None,
                          covs=None,
                          Acovs=None,
                          threshold=5e-8,
                          maxiter=2,
                          qvalues=False,
                          update_covariances=False,
                          **kw_args):
    """
    Kronecker fixed effects test with forward selection
    
    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x P] SP.array of 1 phenotype for N individuals
        K:      [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] SP.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        qvalues:        Use q-value threshold and return q-values in addition (default False)
        update_covar:   Boolean indicator if covariances should be re-estimated after each forward step (default False)
    
    Returns:
        lm:             lmix LMMi object
        resultStruct with elements:
            iadded:         array of indices of SNPs included in order of inclusion
            pvadded:        array of Pvalues obtained by the included SNPs in iteration
                            before inclusion
            pvall:   [maxiter x S] SP.array of Pvalues for all iterations
        Optional:      corresponding q-values
            qvadded
            qvall
    """

    #0. checks
    N = phenos.shape[0]
    P = phenos.shape[1]

    if K1r == None:
        K1r = SP.dot(snps, snps.T)
    else:
        assert K1r.shape[0] == N, 'K1r: dimensions dismatch'
        assert K1r.shape[1] == N, 'K1r: dimensions dismatch'

    if K2r == None:
        K2r = SP.eye(N)
    else:
        assert K2r.shape[0] == N, 'K2r: dimensions dismatch'
        assert K2r.shape[1] == N, 'K2r: dimensions dismatch'

    covs, Acovs = updateKronCovs(covs, Acovs, N, P)

    if Asnps is None:
        Asnps = [SP.ones([1, P])]
    if (type(Asnps) != list):
        Asnps = [Asnps]
    assert len(Asnps) > 0, "need at least one Snp design matrix"

    if Acond is None:
        Acond = Asnps
    if (type(Acond) != list):
        Acond = [Acond]
    assert len(Acond) > 0, "need at least one Snp design matrix"

    #1. run GP model to infer suitable covariance structure
    if K1c == None or K2c == None:
        vc = estimateKronCovariances(phenos=phenos,
                                     K1r=K1r,
                                     K2r=K2r,
                                     K1c=K1c,
                                     K2c=K2c,
                                     covs=covs,
                                     Acovs=Acovs,
                                     **kw_args)
        K1c = vc.getEstTraitCovar(0)
        K2c = vc.getEstTraitCovar(1)
    else:
        vc = None
        assert K1c.shape[0] == P, 'K1c: dimensions dismatch'
        assert K1c.shape[1] == P, 'K1c: dimensions dismatch'
        assert K2c.shape[0] == P, 'K2c: dimensions dismatch'
        assert K2c.shape[1] == P, 'K2c: dimensions dismatch'
    t0 = time.time()
    lm, pv = kronecker_lmm(snps=snps,
                           phenos=phenos,
                           Asnps=Asnps,
                           K1r=K1r,
                           K2r=K2r,
                           K1c=K1c,
                           K2c=K2c,
                           covs=covs,
                           Acovs=Acovs)

    #get pv
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    time_el = []
    pvall = SP.zeros((pv.shape[0] * maxiter, pv.shape[1]))
    qvall = None
    t1 = time.time()
    print("finished GWAS testing in %.2f seconds" % (t1 - t0))
    time_el.append(t1 - t0)
    pvall[0:pv.shape[0], :] = pv
    imin = SP.unravel_index(pv.argmin(), pv.shape)
    score = pv[imin].min()
    niter = 1
    if qvalues:
        assert pv.shape[
            0] == 1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = SP.zeros((maxiter, snps.shape[1]))
        qv = FDR.qvalues(pv)
        qvall[0:1, :] = qv
        score = qv[imin]
    #loop:
    while (score < threshold) and niter < maxiter:
        t0 = time.time()
        pvadded.append(pv[imin])
        iadded.append(imin)
        if qvalues:
            qvadded.append(qv[imin])
        if update_covariances and vc is not None:
            vc.addFixedTerm(snps[:, imin[1]:(imin[1] + 1)], Acond[imin[0]])
            vc.setScales(
            )  #CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None
            vc.findLocalOptima(fast=True)
            K1c = vc.getEstTraitCovar(0)
            K2c = vc.getEstTraitCovar(1)
            lm.setK1c(K1c)
            lm.setK2c(K2c)
        lm.addCovariates(snps[:, imin[1]:(imin[1] + 1)], Acond[imin[0]])
        for i in xrange(len(Asnps)):
            #add SNP design
            lm.setSNPcoldesign(Asnps[i])
            lm.process()
            pv[i, :] = lm.getPv()[0]
        pvall[niter * pv.shape[0]:(niter + 1) * pv.shape[0]] = pv
        imin = SP.unravel_index(pv.argmin(), pv.shape)
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter + 1, :] = qv
            score = qv[imin].min()
        else:
            score = pv[imin].min()
        t1 = time.time()
        print("finished GWAS testing in %.2f seconds" % (t1 - t0))
        time_el.append(t1 - t0)
        niter = niter + 1
    RV = {}
    RV['iadded'] = iadded
    RV['pvadded'] = pvadded
    RV['pvall'] = pvall
    RV['time_el'] = time_el
    if qvalues:
        RV['qvall'] = qvall
        RV['qvadded'] = qvadded
    return lm, RV