def forward_lmm(snps,pheno,K=None,covs=None,qvalues=False,threshold = 5e-8, maxiter = 2,test='lrt',**kw_args): """ univariate fixed effects test with forward selection Args: snps: [N x S] SP.array of S SNPs for N individuals (test SNPs) pheno: [N x 1] SP.array of 1 phenotype for N individuals K: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] SP.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) test: 'lrt' for likelihood ratio test (default) or 'f' for F-test Returns: lm: limix LMM object iadded: array of indices of SNPs included in order of inclusion pvadded: array of Pvalues obtained by the included SNPs in iteration before inclusion pvall: [maxiter x S] SP.array of Pvalues for all iterations """ if K is None: K=SP.eye(snps.shape[0]) if covs is None: covs = SP.ones((snps.shape[0],1)) lm = simple_lmm(snps,pheno,K=K,covs=covs,test=test,**kw_args) pvall = SP.zeros((maxiter,snps.shape[1])) pv = lm.getPv() pvall[0:1,:]=pv imin= pv.argmin() niter = 1 #start stuff iadded = [] pvadded = [] qvadded = [] if qvalues: assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = SP.zeros((maxiter,snps.shape[1])) qv = FDR.qvalues(pv) qvall[0:1,:] = qv score=qv.min() else: score=pv.min() while (score<threshold) and niter<maxiter: t0=time.time() iadded.append(imin) pvadded.append(pv[0,imin]) if qvalues: qvadded.append(qv[0,imin]) covs=SP.concatenate((covs,snps[:,imin:(imin+1)]),1) lm.setCovs(covs) lm.process() pv = lm.getPv() pvall[niter:niter+1,:]=pv imin= pv.argmin() if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter+1,:] = qv score = qv.min() else: score = pv.min() t1=time.time() print ("finished GWAS testing in %.2f seconds" %(t1-t0)) niter=niter+1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = pvall if qvalues: RV['qvall'] = qvall RV['qvadded'] = qvadded return lm,RV
def forward_lmm(snps, pheno, K=None, covs=None, qvalues=False, threshold=5e-8, maxiter=2, test='lrt', **kw_args): """ univariate fixed effects test with forward selection Args: snps: [N x S] SP.array of S SNPs for N individuals (test SNPs) pheno: [N x 1] SP.array of 1 phenotype for N individuals K: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] SP.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) test: 'lrt' for likelihood ratio test (default) or 'f' for F-test Returns: lm: limix LMM object iadded: array of indices of SNPs included in order of inclusion pvadded: array of Pvalues obtained by the included SNPs in iteration before inclusion pvall: [maxiter x S] SP.array of Pvalues for all iterations """ if K is None: K = SP.eye(snps.shape[0]) if covs is None: covs = SP.ones((snps.shape[0], 1)) lm = simple_lmm(snps, pheno, K=K, covs=covs, test=test, **kw_args) pvall = SP.zeros((maxiter, snps.shape[1])) pv = lm.getPv() pvall[0:1, :] = pv imin = pv.argmin() niter = 1 #start stuff iadded = [] pvadded = [] qvadded = [] if qvalues: assert pv.shape[ 0] == 1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = SP.zeros((maxiter, snps.shape[1])) qv = FDR.qvalues(pv) qvall[0:1, :] = qv score = qv.min() else: score = pv.min() while (score < threshold) and niter < maxiter: t0 = time.time() iadded.append(imin) pvadded.append(pv[0, imin]) if qvalues: qvadded.append(qv[0, imin]) covs = SP.concatenate((covs, snps[:, imin:(imin + 1)]), 1) lm.setCovs(covs) lm.process() pv = lm.getPv() pvall[niter:niter + 1, :] = pv imin = pv.argmin() if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter + 1, :] = qv score = qv.min() else: score = pv.min() t1 = time.time() print("finished GWAS testing in %.2f seconds" % (t1 - t0)) niter = niter + 1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = pvall if qvalues: RV['qvall'] = qvall RV['qvadded'] = qvadded return lm, RV
def forward_lmm_kronecker(snps,phenos,Asnps=None,Acond=None,K1r=None,K1c=None,K2r=None,K2c=None,covs=None,Acovs=None,threshold = 5e-8, maxiter = 2,qvalues=False, update_covariances = False,**kw_args): """ Kronecker fixed effects test with forward selection Args: snps: [N x S] SP.array of S SNPs for N individuals (test SNPs) pheno: [N x P] SP.array of 1 phenotype for N individuals K: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] SP.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) qvalues: Use q-value threshold and return q-values in addition (default False) update_covar: Boolean indicator if covariances should be re-estimated after each forward step (default False) Returns: lm: lmix LMMi object resultStruct with elements: iadded: array of indices of SNPs included in order of inclusion pvadded: array of Pvalues obtained by the included SNPs in iteration before inclusion pvall: [maxiter x S] SP.array of Pvalues for all iterations Optional: corresponding q-values qvadded qvall """ #0. checks N = phenos.shape[0] P = phenos.shape[1] if K1r==None: K1r = SP.dot(snps,snps.T) else: assert K1r.shape[0]==N, 'K1r: dimensions dismatch' assert K1r.shape[1]==N, 'K1r: dimensions dismatch' if K2r==None: K2r = SP.eye(N) else: assert K2r.shape[0]==N, 'K2r: dimensions dismatch' assert K2r.shape[1]==N, 'K2r: dimensions dismatch' covs,Acovs = updateKronCovs(covs,Acovs,N,P) if Asnps is None: Asnps = [SP.ones([1,P])] if (type(Asnps)!=list): Asnps = [Asnps] assert len(Asnps)>0, "need at least one Snp design matrix" if Acond is None: Acond = Asnps if (type(Acond)!=list): Acond = [Acond] assert len(Acond)>0, "need at least one Snp design matrix" #1. run GP model to infer suitable covariance structure if K1c==None or K2c==None: vc = estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, **kw_args) K1c = vc.getEstTraitCovar(0) K2c = vc.getEstTraitCovar(1) else: vc = None assert K1c.shape[0]==P, 'K1c: dimensions dismatch' assert K1c.shape[1]==P, 'K1c: dimensions dismatch' assert K2c.shape[0]==P, 'K2c: dimensions dismatch' assert K2c.shape[1]==P, 'K2c: dimensions dismatch' t0 = time.time() lm,pv = kronecker_lmm(snps=snps,phenos=phenos,Asnps=Asnps,K1r=K1r,K2r=K2r,K1c=K1c,K2c=K2c,covs=covs,Acovs=Acovs) #get pv #start stuff iadded = [] pvadded = [] qvadded = [] time_el = [] pvall = SP.zeros((pv.shape[0]*maxiter,pv.shape[1])) qvall = None t1=time.time() print ("finished GWAS testing in %.2f seconds" %(t1-t0)) time_el.append(t1-t0) pvall[0:pv.shape[0],:]=pv imin= SP.unravel_index(pv.argmin(),pv.shape) score=pv[imin].min() niter = 1 if qvalues: assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = SP.zeros((maxiter,snps.shape[1])) qv = FDR.qvalues(pv) qvall[0:1,:] = qv score=qv[imin] #loop: while (score<threshold) and niter<maxiter: t0=time.time() pvadded.append(pv[imin]) iadded.append(imin) if qvalues: qvadded.append(qv[imin]) if update_covariances and vc is not None: vc.addFixedTerm(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]]) vc.setScales()#CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None vc.findLocalOptima(fast=True) K1c = vc.getEstTraitCovar(0) K2c = vc.getEstTraitCovar(1) lm.setK1c(K1c) lm.setK2c(K2c) lm.addCovariates(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]]) for i in xrange(len(Asnps)): #add SNP design lm.setSNPcoldesign(Asnps[i]) lm.process() pv[i,:] = lm.getPv()[0] pvall[niter*pv.shape[0]:(niter+1)*pv.shape[0]]=pv imin= SP.unravel_index(pv.argmin(),pv.shape) if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter+1,:] = qv score = qv[imin].min() else: score = pv[imin].min() t1=time.time() print ("finished GWAS testing in %.2f seconds" %(t1-t0)) time_el.append(t1-t0) niter=niter+1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = pvall RV['time_el'] = time_el if qvalues: RV['qvall'] = qvall RV['qvadded'] = qvadded return lm,RV
def forward_lmm_kronecker(snps, phenos, Asnps=None, Acond=None, K1r=None, K1c=None, K2r=None, K2c=None, covs=None, Acovs=None, threshold=5e-8, maxiter=2, qvalues=False, update_covariances=False, **kw_args): """ Kronecker fixed effects test with forward selection Args: snps: [N x S] SP.array of S SNPs for N individuals (test SNPs) pheno: [N x P] SP.array of 1 phenotype for N individuals K: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] SP.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) qvalues: Use q-value threshold and return q-values in addition (default False) update_covar: Boolean indicator if covariances should be re-estimated after each forward step (default False) Returns: lm: lmix LMMi object resultStruct with elements: iadded: array of indices of SNPs included in order of inclusion pvadded: array of Pvalues obtained by the included SNPs in iteration before inclusion pvall: [maxiter x S] SP.array of Pvalues for all iterations Optional: corresponding q-values qvadded qvall """ #0. checks N = phenos.shape[0] P = phenos.shape[1] if K1r == None: K1r = SP.dot(snps, snps.T) else: assert K1r.shape[0] == N, 'K1r: dimensions dismatch' assert K1r.shape[1] == N, 'K1r: dimensions dismatch' if K2r == None: K2r = SP.eye(N) else: assert K2r.shape[0] == N, 'K2r: dimensions dismatch' assert K2r.shape[1] == N, 'K2r: dimensions dismatch' covs, Acovs = updateKronCovs(covs, Acovs, N, P) if Asnps is None: Asnps = [SP.ones([1, P])] if (type(Asnps) != list): Asnps = [Asnps] assert len(Asnps) > 0, "need at least one Snp design matrix" if Acond is None: Acond = Asnps if (type(Acond) != list): Acond = [Acond] assert len(Acond) > 0, "need at least one Snp design matrix" #1. run GP model to infer suitable covariance structure if K1c == None or K2c == None: vc = estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, **kw_args) K1c = vc.getEstTraitCovar(0) K2c = vc.getEstTraitCovar(1) else: vc = None assert K1c.shape[0] == P, 'K1c: dimensions dismatch' assert K1c.shape[1] == P, 'K1c: dimensions dismatch' assert K2c.shape[0] == P, 'K2c: dimensions dismatch' assert K2c.shape[1] == P, 'K2c: dimensions dismatch' t0 = time.time() lm, pv = kronecker_lmm(snps=snps, phenos=phenos, Asnps=Asnps, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs) #get pv #start stuff iadded = [] pvadded = [] qvadded = [] time_el = [] pvall = SP.zeros((pv.shape[0] * maxiter, pv.shape[1])) qvall = None t1 = time.time() print("finished GWAS testing in %.2f seconds" % (t1 - t0)) time_el.append(t1 - t0) pvall[0:pv.shape[0], :] = pv imin = SP.unravel_index(pv.argmin(), pv.shape) score = pv[imin].min() niter = 1 if qvalues: assert pv.shape[ 0] == 1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = SP.zeros((maxiter, snps.shape[1])) qv = FDR.qvalues(pv) qvall[0:1, :] = qv score = qv[imin] #loop: while (score < threshold) and niter < maxiter: t0 = time.time() pvadded.append(pv[imin]) iadded.append(imin) if qvalues: qvadded.append(qv[imin]) if update_covariances and vc is not None: vc.addFixedTerm(snps[:, imin[1]:(imin[1] + 1)], Acond[imin[0]]) vc.setScales( ) #CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None vc.findLocalOptima(fast=True) K1c = vc.getEstTraitCovar(0) K2c = vc.getEstTraitCovar(1) lm.setK1c(K1c) lm.setK2c(K2c) lm.addCovariates(snps[:, imin[1]:(imin[1] + 1)], Acond[imin[0]]) for i in xrange(len(Asnps)): #add SNP design lm.setSNPcoldesign(Asnps[i]) lm.process() pv[i, :] = lm.getPv()[0] pvall[niter * pv.shape[0]:(niter + 1) * pv.shape[0]] = pv imin = SP.unravel_index(pv.argmin(), pv.shape) if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter + 1, :] = qv score = qv[imin].min() else: score = pv[imin].min() t1 = time.time() print("finished GWAS testing in %.2f seconds" % (t1 - t0)) time_el.append(t1 - t0) niter = niter + 1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = pvall RV['time_el'] = time_el if qvalues: RV['qvall'] = qvall RV['qvadded'] = qvadded return lm, RV