Esempio n. 1
0
File: qtl.py Progetto: PMBio/limix
def forward_lmm_kronecker(snps,phenos,Asnps=None,Acond=None,K1r=None,K1c=None,K2r=None,K2c=None,covs=None,Acovs=None,threshold=5e-8,maxiter=2,qvalues=False, update_covariances = False,verbose=None,**kw_args):
    """
    Kronecker fixed effects test with forward selection

    Args:
        snps:   [N x S] np.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x P] np.array of 1 phenotype for N individuals
        K:      [N x N] np.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] np.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        qvalues:        Use q-value threshold and return q-values in addition (default False)
        update_covar:   Boolean indicator if covariances should be re-estimated after each forward step (default False)

    Returns:
        lm:             lmix LMMi object
        resultStruct with elements:
            iadded:         array of indices of SNPs included in order of inclusion
            pvadded:        array of Pvalues obtained by the included SNPs in iteration
                            before inclusion
            pvall:     [Nadded x S] np.array of Pvalues for all iterations.
        Optional:      corresponding q-values
            qvadded
            qvall
    """
    verbose = dlimix.getVerbose(verbose)
    #0. checks
    N  = phenos.shape[0]
    P  = phenos.shape[1]

    if K1r==None:
        K1r = np.dot(snps,snps.T)
    else:
        assert K1r.shape[0]==N, 'K1r: dimensions dismatch'
        assert K1r.shape[1]==N, 'K1r: dimensions dismatch'

    if K2r==None:
        K2r = np.eye(N)
    else:
        assert K2r.shape[0]==N, 'K2r: dimensions dismatch'
        assert K2r.shape[1]==N, 'K2r: dimensions dismatch'

    covs,Acovs = _updateKronCovs(covs,Acovs,N,P)

    if Asnps is None:
        Asnps = [np.ones([1,P])]
    if (type(Asnps)!=list):
        Asnps = [Asnps]
    assert len(Asnps)>0, "need at least one Snp design matrix"

    if Acond is None:
        Acond = Asnps
    if (type(Acond)!=list):
        Acond = [Acond]
    assert len(Acond)>0, "need at least one Snp design matrix"

    #1. run GP model to infer suitable covariance structure
    if K1c==None or K2c==None:
        vc = _estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, **kw_args)
        K1c = vc.getTraitCovar(0)
        K2c = vc.getTraitCovar(1)
    else:
        vc = None
        assert K1c.shape[0]==P, 'K1c: dimensions dismatch'
        assert K1c.shape[1]==P, 'K1c: dimensions dismatch'
        assert K2c.shape[0]==P, 'K2c: dimensions dismatch'
        assert K2c.shape[1]==P, 'K2c: dimensions dismatch'
    t0 = time.time()
    lm,pv = test_lmm_kronecker(snps=snps,phenos=phenos,Asnps=Asnps,K1r=K1r,K2r=K2r,K1c=K1c,K2c=K2c,covs=covs,Acovs=Acovs)

    #get pv
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    time_el = []
    pvall = []
    qvall = None
    t1=time.time()
    if verbose:
        print(("finished GWAS testing in %.2f seconds" %(t1-t0)))
    time_el.append(t1-t0)
    pvall.append(pv)
    imin= np.unravel_index(pv.argmin(),pv.shape)
    score=pv[imin].min()
    niter = 1
    if qvalues:
        assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = []
        qv  = FDR.qvalues(pv)
        qvall.append(qv)
        score=qv[imin]
    #loop:
    while (score<threshold) and niter<maxiter:
        t0=time.time()
        pvadded.append(pv[imin])
        iadded.append(imin)
        if qvalues:
            qvadded.append(qv[imin])
        if update_covariances and vc is not None:
            vc.addFixedTerm(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]])
            vc.setScales()#CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None
            vc.findLocalOptima(fast=True)
            K1c = vc.getTraitCovar(0)
            K2c = vc.getTraitCovar(1)
            lm.setK1c(K1c)
            lm.setK2c(K2c)
        lm.addCovariates(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]])
        for i in range(len(Asnps)):
            #add SNP design
            lm.setSNPcoldesign(Asnps[i])
            lm.process()
            pv[i,:] = lm.getPv()[0]
        pvall.append(pv.ravel())
        imin= np.unravel_index(pv.argmin(),pv.shape)
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter+1,:] = qv
            score = qv[imin].min()
        else:
            score = pv[imin].min()
        t1=time.time()
        if verbose:
            print(("finished GWAS testing in %.2f seconds" %(t1-t0)))
        time_el.append(t1-t0)
        niter=niter+1
    RV = {}
    RV['iadded']  = iadded
    RV['pvadded'] = pvadded
    RV['pvall']   = np.array(pvall)
    RV['time_el'] = time_el
    if qvalues:
        RV['qvall'] = qvall
        RV['qvadded'] = qvadded
    return lm,RV
Esempio n. 2
0
def forward_lmm_kronecker(snps,phenos,Asnps=None,Acond=None,K1r=None,K1c=None,K2r=None,K2c=None,covs=None,Acovs=None,threshold=5e-8,maxiter=2,qvalues=False, update_covariances = False,verbose=None,**kw_args):
    """
    Kronecker fixed effects test with forward selection

    Args:
        snps:   [N x S] np.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x P] np.array of 1 phenotype for N individuals
        K:      [N x N] np.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] np.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        qvalues:        Use q-value threshold and return q-values in addition (default False)
        update_covar:   Boolean indicator if covariances should be re-estimated after each forward step (default False)

    Returns:
        lm:             lmix LMMi object
        resultStruct with elements:
            iadded:         array of indices of SNPs included in order of inclusion
            pvadded:        array of Pvalues obtained by the included SNPs in iteration
                            before inclusion
            pvall:     [Nadded x S] np.array of Pvalues for all iterations.
        Optional:      corresponding q-values
            qvadded
            qvall
    """
    verbose = limix.getVerbose(verbose)
    #0. checks
    N  = phenos.shape[0]
    P  = phenos.shape[1]

    if K1r==None:
        K1r = np.dot(snps,snps.T)
    else:
        assert K1r.shape[0]==N, 'K1r: dimensions dismatch'
        assert K1r.shape[1]==N, 'K1r: dimensions dismatch'

    if K2r==None:
        K2r = np.eye(N)
    else:
        assert K2r.shape[0]==N, 'K2r: dimensions dismatch'
        assert K2r.shape[1]==N, 'K2r: dimensions dismatch'

    covs,Acovs = _updateKronCovs(covs,Acovs,N,P)

    if Asnps is None:
        Asnps = [np.ones([1,P])]
    if (type(Asnps)!=list):
        Asnps = [Asnps]
    assert len(Asnps)>0, "need at least one Snp design matrix"

    if Acond is None:
        Acond = Asnps
    if (type(Acond)!=list):
        Acond = [Acond]
    assert len(Acond)>0, "need at least one Snp design matrix"

    #1. run GP model to infer suitable covariance structure
    if K1c==None or K2c==None:
        vc = _estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, **kw_args)
        K1c = vc.getTraitCovar(0)
        K2c = vc.getTraitCovar(1)
    else:
        vc = None
        assert K1c.shape[0]==P, 'K1c: dimensions dismatch'
        assert K1c.shape[1]==P, 'K1c: dimensions dismatch'
        assert K2c.shape[0]==P, 'K2c: dimensions dismatch'
        assert K2c.shape[1]==P, 'K2c: dimensions dismatch'
    t0 = time.time()
    lm,pv = test_lmm_kronecker(snps=snps,phenos=phenos,Asnps=Asnps,K1r=K1r,K2r=K2r,K1c=K1c,K2c=K2c,covs=covs,Acovs=Acovs)

    #get pv
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    time_el = []
    pvall = []
    qvall = None
    t1=time.time()
    if verbose:
        print ("finished GWAS testing in %.2f seconds" %(t1-t0))
    time_el.append(t1-t0)
    pvall.append(pv)
    imin= np.unravel_index(pv.argmin(),pv.shape)
    score=pv[imin].min()
    niter = 1
    if qvalues:
        assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = []
        qv  = FDR.qvalues(pv)
        qvall.append(qv)
        score=qv[imin]
    #loop:
    while (score<threshold) and niter<maxiter:
        t0=time.time()
        pvadded.append(pv[imin])
        iadded.append(imin)
        if qvalues:
            qvadded.append(qv[imin])
        if update_covariances and vc is not None:
            vc.addFixedTerm(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]])
            vc.setScales()#CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None
            vc.findLocalOptima(fast=True)
            K1c = vc.getTraitCovar(0)
            K2c = vc.getTraitCovar(1)
            lm.setK1c(K1c)
            lm.setK2c(K2c)
        lm.addCovariates(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]])
        for i in xrange(len(Asnps)):
            #add SNP design
            lm.setSNPcoldesign(Asnps[i])
            lm.process()
            pv[i,:] = lm.getPv()[0]
        pvall.append(pv.ravel())
        imin= np.unravel_index(pv.argmin(),pv.shape)
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter+1,:] = qv
            score = qv[imin].min()
        else:
            score = pv[imin].min()
        t1=time.time()
        if verbose:
            print ("finished GWAS testing in %.2f seconds" %(t1-t0))
        time_el.append(t1-t0)
        niter=niter+1
    RV = {}
    RV['iadded']  = iadded
    RV['pvadded'] = pvadded
    RV['pvall']   = np.array(pvall)
    RV['time_el'] = time_el
    if qvalues:
        RV['qvall'] = qvall
        RV['qvadded'] = qvadded
    return lm,RV
Esempio n. 3
0
File: qtl.py Progetto: PMBio/limix
def forward_lmm(snps,pheno,K=None,covs=None,qvalues=False,threshold=5e-8,maxiter=2,test='lrt',verbose=None,**kw_args):
    """
    univariate fixed effects test with forward selection

    Args:
        snps:   [N x S] np.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x 1] np.array of 1 phenotype for N individuals
        K:      [N x N] np.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] np.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        test:           'lrt' for likelihood ratio test (default) or 'f' for F-test
        verbose: print verbose output? (False)

    Returns:
        lm:     limix LMM object
        RV:     dictionary
                RV['iadded']:   array of indices of SNPs included in order of inclusion
                RV['pvadded']:  array of Pvalues obtained by the included SNPs in iteration
                                before inclusion
                RV['pvall']:    [Nadded x S] np.array of Pvalues for all iterations
    """
    verbose = dlimix.getVerbose(verbose)

    if K is None:
        K=np.eye(snps.shape[0])
    if covs is None:
        covs = np.ones((snps.shape[0],1))
    #assert single trait
    assert pheno.shape[1]==1, 'forward_lmm only supports single phenotypes'

    lm = test_lmm(snps,pheno,K=K,covs=covs,test=test,**kw_args)
    pvall = []
    pv = lm.getPv().ravel()
    #hack to avoid issues with degenerate pv
    pv[sp.isnan(pv)] = 1
    pvall.append(pv)
    imin= pv.argmin()
    niter = 1
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    if qvalues:
        assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = []
        qv  = FDR.qvalues(pv)
        qvall.append(qv)
        score=qv.min()
    else:
        score=pv.min()
    while (score<threshold) and niter<maxiter:
        t0=time.time()
        iadded.append(imin)
        pvadded.append(pv[imin])
        if qvalues:
            qvadded.append(qv[0,imin])
        covs=np.concatenate((covs,snps[:,imin:(imin+1)]),1)
        lm.setCovs(covs)
        lm.process()
        pv = lm.getPv().ravel()
        pv[sp.isnan(pv)] = 1
        pvall.append(pv)
        imin= pv.argmin()
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter+1,:] = qv
            score = qv.min()
        else:
            score = pv.min()
        t1=time.time()
        if verbose:
            print(("finished GWAS testing in %.2f seconds" %(t1-t0)))
        niter=niter+1
    RV = {}
    RV['iadded']  = iadded
    RV['pvadded'] = pvadded
    RV['pvall']   = np.array(pvall)
    if qvalues:
        RV['qvall'] = np.array(qvall)
        RV['qvadded'] = qvadded
    return lm,RV
Esempio n. 4
0
def forward_lmm(snps,pheno,K=None,covs=None,qvalues=False,threshold=5e-8,maxiter=2,test='lrt',verbose=None,**kw_args):
    """
    univariate fixed effects test with forward selection

    Args:
        snps:   [N x S] np.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x 1] np.array of 1 phenotype for N individuals
        K:      [N x N] np.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] np.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        test:           'lrt' for likelihood ratio test (default) or 'f' for F-test
        verbose: print verbose output? (False)

    Returns:
        lm:     limix LMM object
        RV:     dictionary
                RV['iadded']:   array of indices of SNPs included in order of inclusion
                RV['pvadded']:  array of Pvalues obtained by the included SNPs in iteration
                                before inclusion
                RV['pvall']:    [Nadded x S] np.array of Pvalues for all iterations
    """
    verbose = limix.getVerbose(verbose)

    if K is None:
        K=np.eye(snps.shape[0])
    if covs is None:
        covs = np.ones((snps.shape[0],1))
    #assert single trait
    assert pheno.shape[1]==1, 'forward_lmm only supports single phenotypes'

    lm = test_lmm(snps,pheno,K=K,covs=covs,test=test,**kw_args)
    pvall = []
    pv = lm.getPv().ravel()
    #hack to avoid issues with degenerate pv
    pv[sp.isnan(pv)] = 1
    pvall.append(pv)
    imin= pv.argmin()
    niter = 1
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    if qvalues:
        assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = []
        qv  = FDR.qvalues(pv)
        qvall.append(qv)
        score=qv.min()
    else:
        score=pv.min()
    while (score<threshold) and niter<maxiter:
        t0=time.time()
        iadded.append(imin)
        pvadded.append(pv[imin])
        if qvalues:
            qvadded.append(qv[0,imin])
        covs=np.concatenate((covs,snps[:,imin:(imin+1)]),1)
        lm.setCovs(covs)
        lm.process()
        pv = lm.getPv().ravel()
        pv[sp.isnan(pv)] = 1
        pvall.append(pv)
        imin= pv.argmin()
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter+1,:] = qv
            score = qv.min()
        else:
            score = pv.min()
        t1=time.time()
        if verbose:
            print ("finished GWAS testing in %.2f seconds" %(t1-t0))
        niter=niter+1
    RV = {}
    RV['iadded']  = iadded
    RV['pvadded'] = pvadded
    RV['pvall']   = np.array(pvall)
    if qvalues:
        RV['qvall'] = np.array(qvall)
        RV['qvadded'] = qvadded
    return lm,RV