def forward_lmm_kronecker(snps,phenos,Asnps=None,Acond=None,K1r=None,K1c=None,K2r=None,K2c=None,covs=None,Acovs=None,threshold=5e-8,maxiter=2,qvalues=False, update_covariances = False,verbose=None,**kw_args): """ Kronecker fixed effects test with forward selection Args: snps: [N x S] np.array of S SNPs for N individuals (test SNPs) pheno: [N x P] np.array of 1 phenotype for N individuals K: [N x N] np.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] np.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) qvalues: Use q-value threshold and return q-values in addition (default False) update_covar: Boolean indicator if covariances should be re-estimated after each forward step (default False) Returns: lm: lmix LMMi object resultStruct with elements: iadded: array of indices of SNPs included in order of inclusion pvadded: array of Pvalues obtained by the included SNPs in iteration before inclusion pvall: [Nadded x S] np.array of Pvalues for all iterations. Optional: corresponding q-values qvadded qvall """ verbose = dlimix.getVerbose(verbose) #0. checks N = phenos.shape[0] P = phenos.shape[1] if K1r==None: K1r = np.dot(snps,snps.T) else: assert K1r.shape[0]==N, 'K1r: dimensions dismatch' assert K1r.shape[1]==N, 'K1r: dimensions dismatch' if K2r==None: K2r = np.eye(N) else: assert K2r.shape[0]==N, 'K2r: dimensions dismatch' assert K2r.shape[1]==N, 'K2r: dimensions dismatch' covs,Acovs = _updateKronCovs(covs,Acovs,N,P) if Asnps is None: Asnps = [np.ones([1,P])] if (type(Asnps)!=list): Asnps = [Asnps] assert len(Asnps)>0, "need at least one Snp design matrix" if Acond is None: Acond = Asnps if (type(Acond)!=list): Acond = [Acond] assert len(Acond)>0, "need at least one Snp design matrix" #1. run GP model to infer suitable covariance structure if K1c==None or K2c==None: vc = _estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, **kw_args) K1c = vc.getTraitCovar(0) K2c = vc.getTraitCovar(1) else: vc = None assert K1c.shape[0]==P, 'K1c: dimensions dismatch' assert K1c.shape[1]==P, 'K1c: dimensions dismatch' assert K2c.shape[0]==P, 'K2c: dimensions dismatch' assert K2c.shape[1]==P, 'K2c: dimensions dismatch' t0 = time.time() lm,pv = test_lmm_kronecker(snps=snps,phenos=phenos,Asnps=Asnps,K1r=K1r,K2r=K2r,K1c=K1c,K2c=K2c,covs=covs,Acovs=Acovs) #get pv #start stuff iadded = [] pvadded = [] qvadded = [] time_el = [] pvall = [] qvall = None t1=time.time() if verbose: print(("finished GWAS testing in %.2f seconds" %(t1-t0))) time_el.append(t1-t0) pvall.append(pv) imin= np.unravel_index(pv.argmin(),pv.shape) score=pv[imin].min() niter = 1 if qvalues: assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = [] qv = FDR.qvalues(pv) qvall.append(qv) score=qv[imin] #loop: while (score<threshold) and niter<maxiter: t0=time.time() pvadded.append(pv[imin]) iadded.append(imin) if qvalues: qvadded.append(qv[imin]) if update_covariances and vc is not None: vc.addFixedTerm(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]]) vc.setScales()#CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None vc.findLocalOptima(fast=True) K1c = vc.getTraitCovar(0) K2c = vc.getTraitCovar(1) lm.setK1c(K1c) lm.setK2c(K2c) lm.addCovariates(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]]) for i in range(len(Asnps)): #add SNP design lm.setSNPcoldesign(Asnps[i]) lm.process() pv[i,:] = lm.getPv()[0] pvall.append(pv.ravel()) imin= np.unravel_index(pv.argmin(),pv.shape) if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter+1,:] = qv score = qv[imin].min() else: score = pv[imin].min() t1=time.time() if verbose: print(("finished GWAS testing in %.2f seconds" %(t1-t0))) time_el.append(t1-t0) niter=niter+1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = np.array(pvall) RV['time_el'] = time_el if qvalues: RV['qvall'] = qvall RV['qvadded'] = qvadded return lm,RV
def forward_lmm_kronecker(snps,phenos,Asnps=None,Acond=None,K1r=None,K1c=None,K2r=None,K2c=None,covs=None,Acovs=None,threshold=5e-8,maxiter=2,qvalues=False, update_covariances = False,verbose=None,**kw_args): """ Kronecker fixed effects test with forward selection Args: snps: [N x S] np.array of S SNPs for N individuals (test SNPs) pheno: [N x P] np.array of 1 phenotype for N individuals K: [N x N] np.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] np.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) qvalues: Use q-value threshold and return q-values in addition (default False) update_covar: Boolean indicator if covariances should be re-estimated after each forward step (default False) Returns: lm: lmix LMMi object resultStruct with elements: iadded: array of indices of SNPs included in order of inclusion pvadded: array of Pvalues obtained by the included SNPs in iteration before inclusion pvall: [Nadded x S] np.array of Pvalues for all iterations. Optional: corresponding q-values qvadded qvall """ verbose = limix.getVerbose(verbose) #0. checks N = phenos.shape[0] P = phenos.shape[1] if K1r==None: K1r = np.dot(snps,snps.T) else: assert K1r.shape[0]==N, 'K1r: dimensions dismatch' assert K1r.shape[1]==N, 'K1r: dimensions dismatch' if K2r==None: K2r = np.eye(N) else: assert K2r.shape[0]==N, 'K2r: dimensions dismatch' assert K2r.shape[1]==N, 'K2r: dimensions dismatch' covs,Acovs = _updateKronCovs(covs,Acovs,N,P) if Asnps is None: Asnps = [np.ones([1,P])] if (type(Asnps)!=list): Asnps = [Asnps] assert len(Asnps)>0, "need at least one Snp design matrix" if Acond is None: Acond = Asnps if (type(Acond)!=list): Acond = [Acond] assert len(Acond)>0, "need at least one Snp design matrix" #1. run GP model to infer suitable covariance structure if K1c==None or K2c==None: vc = _estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, **kw_args) K1c = vc.getTraitCovar(0) K2c = vc.getTraitCovar(1) else: vc = None assert K1c.shape[0]==P, 'K1c: dimensions dismatch' assert K1c.shape[1]==P, 'K1c: dimensions dismatch' assert K2c.shape[0]==P, 'K2c: dimensions dismatch' assert K2c.shape[1]==P, 'K2c: dimensions dismatch' t0 = time.time() lm,pv = test_lmm_kronecker(snps=snps,phenos=phenos,Asnps=Asnps,K1r=K1r,K2r=K2r,K1c=K1c,K2c=K2c,covs=covs,Acovs=Acovs) #get pv #start stuff iadded = [] pvadded = [] qvadded = [] time_el = [] pvall = [] qvall = None t1=time.time() if verbose: print ("finished GWAS testing in %.2f seconds" %(t1-t0)) time_el.append(t1-t0) pvall.append(pv) imin= np.unravel_index(pv.argmin(),pv.shape) score=pv[imin].min() niter = 1 if qvalues: assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = [] qv = FDR.qvalues(pv) qvall.append(qv) score=qv[imin] #loop: while (score<threshold) and niter<maxiter: t0=time.time() pvadded.append(pv[imin]) iadded.append(imin) if qvalues: qvadded.append(qv[imin]) if update_covariances and vc is not None: vc.addFixedTerm(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]]) vc.setScales()#CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None vc.findLocalOptima(fast=True) K1c = vc.getTraitCovar(0) K2c = vc.getTraitCovar(1) lm.setK1c(K1c) lm.setK2c(K2c) lm.addCovariates(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]]) for i in xrange(len(Asnps)): #add SNP design lm.setSNPcoldesign(Asnps[i]) lm.process() pv[i,:] = lm.getPv()[0] pvall.append(pv.ravel()) imin= np.unravel_index(pv.argmin(),pv.shape) if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter+1,:] = qv score = qv[imin].min() else: score = pv[imin].min() t1=time.time() if verbose: print ("finished GWAS testing in %.2f seconds" %(t1-t0)) time_el.append(t1-t0) niter=niter+1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = np.array(pvall) RV['time_el'] = time_el if qvalues: RV['qvall'] = qvall RV['qvadded'] = qvadded return lm,RV
def forward_lmm(snps,pheno,K=None,covs=None,qvalues=False,threshold=5e-8,maxiter=2,test='lrt',verbose=None,**kw_args): """ univariate fixed effects test with forward selection Args: snps: [N x S] np.array of S SNPs for N individuals (test SNPs) pheno: [N x 1] np.array of 1 phenotype for N individuals K: [N x N] np.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] np.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) test: 'lrt' for likelihood ratio test (default) or 'f' for F-test verbose: print verbose output? (False) Returns: lm: limix LMM object RV: dictionary RV['iadded']: array of indices of SNPs included in order of inclusion RV['pvadded']: array of Pvalues obtained by the included SNPs in iteration before inclusion RV['pvall']: [Nadded x S] np.array of Pvalues for all iterations """ verbose = dlimix.getVerbose(verbose) if K is None: K=np.eye(snps.shape[0]) if covs is None: covs = np.ones((snps.shape[0],1)) #assert single trait assert pheno.shape[1]==1, 'forward_lmm only supports single phenotypes' lm = test_lmm(snps,pheno,K=K,covs=covs,test=test,**kw_args) pvall = [] pv = lm.getPv().ravel() #hack to avoid issues with degenerate pv pv[sp.isnan(pv)] = 1 pvall.append(pv) imin= pv.argmin() niter = 1 #start stuff iadded = [] pvadded = [] qvadded = [] if qvalues: assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = [] qv = FDR.qvalues(pv) qvall.append(qv) score=qv.min() else: score=pv.min() while (score<threshold) and niter<maxiter: t0=time.time() iadded.append(imin) pvadded.append(pv[imin]) if qvalues: qvadded.append(qv[0,imin]) covs=np.concatenate((covs,snps[:,imin:(imin+1)]),1) lm.setCovs(covs) lm.process() pv = lm.getPv().ravel() pv[sp.isnan(pv)] = 1 pvall.append(pv) imin= pv.argmin() if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter+1,:] = qv score = qv.min() else: score = pv.min() t1=time.time() if verbose: print(("finished GWAS testing in %.2f seconds" %(t1-t0))) niter=niter+1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = np.array(pvall) if qvalues: RV['qvall'] = np.array(qvall) RV['qvadded'] = qvadded return lm,RV
def forward_lmm(snps,pheno,K=None,covs=None,qvalues=False,threshold=5e-8,maxiter=2,test='lrt',verbose=None,**kw_args): """ univariate fixed effects test with forward selection Args: snps: [N x S] np.array of S SNPs for N individuals (test SNPs) pheno: [N x 1] np.array of 1 phenotype for N individuals K: [N x N] np.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] np.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) test: 'lrt' for likelihood ratio test (default) or 'f' for F-test verbose: print verbose output? (False) Returns: lm: limix LMM object RV: dictionary RV['iadded']: array of indices of SNPs included in order of inclusion RV['pvadded']: array of Pvalues obtained by the included SNPs in iteration before inclusion RV['pvall']: [Nadded x S] np.array of Pvalues for all iterations """ verbose = limix.getVerbose(verbose) if K is None: K=np.eye(snps.shape[0]) if covs is None: covs = np.ones((snps.shape[0],1)) #assert single trait assert pheno.shape[1]==1, 'forward_lmm only supports single phenotypes' lm = test_lmm(snps,pheno,K=K,covs=covs,test=test,**kw_args) pvall = [] pv = lm.getPv().ravel() #hack to avoid issues with degenerate pv pv[sp.isnan(pv)] = 1 pvall.append(pv) imin= pv.argmin() niter = 1 #start stuff iadded = [] pvadded = [] qvadded = [] if qvalues: assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = [] qv = FDR.qvalues(pv) qvall.append(qv) score=qv.min() else: score=pv.min() while (score<threshold) and niter<maxiter: t0=time.time() iadded.append(imin) pvadded.append(pv[imin]) if qvalues: qvadded.append(qv[0,imin]) covs=np.concatenate((covs,snps[:,imin:(imin+1)]),1) lm.setCovs(covs) lm.process() pv = lm.getPv().ravel() pv[sp.isnan(pv)] = 1 pvall.append(pv) imin= pv.argmin() if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter+1,:] = qv score = qv.min() else: score = pv.min() t1=time.time() if verbose: print ("finished GWAS testing in %.2f seconds" %(t1-t0)) niter=niter+1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = np.array(pvall) if qvalues: RV['qvall'] = np.array(qvall) RV['qvadded'] = qvadded return lm,RV