def optimize_with_repeates(self,fast=None,verbose=None,n_times=10,lambd=None): """ Train the model repeadly up to a number specified by the users with random restarts and return a list of all relative minima that have been found Args: fast: Boolean. if set to True initalize kronSumGP verbose: Boolean. If set to True, verbose output is produced. (default True) n_times: number of re-starts of the optimization. (default 10) """ verbose = limix.getVerbose(verbose) if not self.init: self._initGP(fast) opt_list = [] fixed0 = sp.zeros_like(self.gp.getParams()['dataTerm']) # minimize n_times for i in range(n_times): scales1 = self._getScalesRand() fixed1 = 1e-1*sp.randn(fixed0.shape[0],fixed0.shape[1]) conv = self.trainGP(fast=fast,scales0=scales1,fixed0=fixed1,lambd=lambd) if conv: # compare with previous minima temp=1 for j in range(len(opt_list)): if sp.allclose(abs(self.getScales()),abs(opt_list[j]['scales'])): temp=0 opt_list[j]['counter']+=1 break if temp==1: opt = {} opt['counter'] = 1 opt['LML'] = self.getLML() opt['scales'] = self.getScales() opt_list.append(opt) # sort by LML LML = sp.array([opt_list[i]['LML'] for i in range(len(opt_list))]) index = LML.argsort()[::-1] out = [] if verbose: print "\nLocal mimima\n" print "n_times\t\tLML" print "------------------------------------" for i in range(len(opt_list)): out.append(opt_list[index[i]]) if verbose: print "%d\t\t%f" % (opt_list[index[i]]['counter'], opt_list[index[i]]['LML']) print "" return out
def __init__(self, snps, pheno, K=None, covs=None, test='lrt', NumIntervalsDelta0=100, NumIntervalsDeltaAlt=100, searchDelta=False, verbose=None): """ Univariate fixed effects linear mixed model test for all SNPs If phenotypes have missing values, then the subset of individuals used for each phenotype column will be subsetted Args: snps: [N x S] np.array of S SNPs for N individuals pheno: [N x P] np.array of P phenotype sfor N individuals K: [N x N] np.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] np.array of D covariates for N individuals test: 'lrt' for likelihood ratio test (default) or 'f' for F-test NumIntervalsDelta0: number of steps for delta optimization on the null model (100) NumIntervalsDeltaAlt: number of steps for delta optimization on the alt. model (100), requires searchDelta=True to have an effect. searchDelta: Carry out delta optimization on the alternative model? if yes We use NumIntervalsDeltaAlt steps verbose: print verbose output? (False) """ #create column of 1 for fixed if nothing provide if len(pheno.shape)==1: pheno = pheno[:,sp.newaxis] self.verbose = limix.getVerbose(verbose) self.snps = snps self.pheno = pheno self.K = K self.covs = covs self.test = test self.NumIntervalsDelta0 = NumIntervalsDelta0 self.NumIntervalsDeltaAlt = NumIntervalsDeltaAlt self.searchDelta = searchDelta self.verbose = verbose self.N = self.pheno.shape[0] self.P = self.pheno.shape[1] self.Iok = ~(np.isnan(self.pheno).any(axis=1)) if self.K is None: self.searchDelta=False self.K = np.eye(self.snps.shape[0]) if self.covs is None: self.covs = np.ones((self.snps.shape[0],1)) self._lmm = None #run self.verbose = verbose self.process()
def _getH2singleTrait(self, K, verbose=None): """ Internal function for parameter initialization estimate variance components and fixed effect using a linear mixed model with an intercept and 2 random effects (one is noise) Args: K: covariance matrix of the non-noise random effect term """ verbose = limix.getVerbose(verbose) # Fit single trait model varg = sp.zeros(self.P) varn = sp.zeros(self.P) fixed = sp.zeros((1, self.P)) for p in range(self.P): y = self.Y[:, p:p + 1] # check if some sull value I = sp.isnan(y[:, 0]) if I.sum() > 0: y = y[~I, :] _K = K[~I, :][:, ~I] else: _K = copy.copy(K) lmm = limix.CLMM() lmm.setK(_K) lmm.setSNPs(sp.ones((y.shape[0], 1))) lmm.setPheno(y) lmm.setCovs(sp.zeros((y.shape[0], 1))) lmm.setVarcompApprox0(-20, 20, 1000) lmm.process() delta = sp.exp(lmm.getLdelta0()[0, 0]) Vtot = sp.exp(lmm.getLSigma()[0, 0]) varg[p] = Vtot varn[p] = delta * Vtot fixed[:, p] = lmm.getBetaSNP() if verbose: print p sth = {} sth['varg'] = varg sth['varn'] = varn sth['fixed'] = fixed return sth
def _getH2singleTrait(self, K, verbose=None): """ Internal function for parameter initialization estimate variance components and fixed effect using a linear mixed model with an intercept and 2 random effects (one is noise) Args: K: covariance matrix of the non-noise random effect term """ verbose = limix.getVerbose(verbose) # Fit single trait model varg = sp.zeros(self.P) varn = sp.zeros(self.P) fixed = sp.zeros((1,self.P)) for p in range(self.P): y = self.Y[:,p:p+1] # check if some sull value I = sp.isnan(y[:,0]) if I.sum()>0: y = y[~I,:] _K = K[~I,:][:,~I] else: _K = copy.copy(K) lmm = limix.CLMM() lmm.setK(_K) lmm.setSNPs(sp.ones((y.shape[0],1))) lmm.setPheno(y) lmm.setCovs(sp.zeros((y.shape[0],1))) lmm.setVarcompApprox0(-20, 20, 1000) lmm.process() delta = sp.exp(lmm.getLdelta0()[0,0]) Vtot = sp.exp(lmm.getLSigma()[0,0]) varg[p] = Vtot varn[p] = delta*Vtot fixed[:,p] = lmm.getBetaSNP() if verbose: print p sth = {} sth['varg'] = varg sth['varn'] = varn sth['fixed'] = fixed return sth
def test_interaction_GxE_1dof(snps,pheno,env,K=None,covs=None, test='lrt',verbose=None): """ Univariate GxE fixed effects interaction linear mixed model test for all pairs of SNPs and environmental variables. Args: snps: [N x S] np.array of S SNPs for N individuals pheno: [N x 1] np.array of 1 phenotype for N individuals env: [N x E] np.array of E environmental variables for N individuals K: [N x N] np.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] np.array of D covariates for N individuals test: 'lrt' for likelihood ratio test (default) or 'f' for F-test verbose: print verbose output? (False) Returns: pv: [E x S] np.array of P values for interaction tests between all E environmental variables and all S SNPs """ verbose = limix.getVerbose(verbose) N=snps.shape[0] if K is None: K=np.eye(N) if covs is None: covs = np.ones((N,1)) assert (env.shape[0]==N and pheno.shape[0]==N and K.shape[0]==N and K.shape[1]==N and covs.shape[0]==N), "shapes missmatch" Inter0 = np.ones((N,1)) pv = np.zeros((env.shape[1],snps.shape[1])) if verbose: print ("starting %i interaction scans for %i SNPs each." % (env.shape[1], snps.shape[1])) t0=time.time() for i in xrange(env.shape[1]): t0_i = time.time() cov_i = np.concatenate((covs,env[:,i:(i+1)]),1) lm_i = test_interaction_lmm(snps=snps,pheno=pheno,covs=cov_i,Inter=env[:,i:(i+1)],Inter0=Inter0,test=test) pv[i,:]=lm_i.getPv()[0,:] t1_i = time.time() if verbose: print ("Finished %i out of %i interaction scans in %.2f seconds."%((i+1),env.shape[1],(t1_i-t0_i))) t1 = time.time() print ("-----------------------------------------------------------\nFinished all %i interaction scans in %.2f seconds."%(env.shape[1],(t1-t0))) return pv
def crossValidation(self, seed=0, n_folds=10, fullVector=True, verbose=None, **keywords): """ Split the dataset in n folds, predict each fold after training the model on all the others Args: seed: seed n_folds: number of folds to train the model on fullVector: Bolean indicator, if true it stops if no convergence is observed for one of the folds, otherwise goes through and returns a pheno matrix with missing values verbose: if true, prints the fold that is being used for predicitons **keywords: params to pass to the function optimize Returns: Matrix of phenotype predictions [N,P] """ verbose = limix.getVerbose(verbose) # split samples into training and test sp.random.seed(seed) r = sp.random.permutation(self.Y.shape[0]) nfolds = 10 Icv = sp.floor(((sp.ones( (self.Y.shape[0])) * nfolds) * r) / self.Y.shape[0]) RV = sp.zeros_like(self.Y) for fold_j in range(n_folds): if verbose: print ".. predict fold %d" % fold_j Itrain = Icv != fold_j Itest = Icv == fold_j Ytrain = self.Y[Itrain, :] Ytest = self.Y[Itest, :] vc = VarianceDecomposition(Ytrain) vc.setTestSampleSize(Itest.sum()) for term_i in range(self.n_fixedEffs): F = self.vd.getFixed(term_i) Ftest = F[Itest, :] Ftrain = F[Itrain, :] if self.P > 1: A = self.vd.getDesign(term_i) else: A = None vc.addFixedEffect(F=Ftrain, Ftest=Ftest, A=A) for term_i in range(self.n_randEffs): if self.P > 1: tct = self.trait_covar_type[term_i] rank = self.rank[term_i] ftc = self.fixed_tc[term_i] jitt = self.jitter[term_i] else: tct = None rank = None ftc = None jitt = None if term_i == self.noisPos: vc.addRandomEffect(is_noise=True, trait_covar_type=tct, rank=rank, jitter=jitt, fixed_trait_covar=ftc) else: R = self.vd.getTerm(term_i).getK() Rtrain = R[Itrain, :][:, Itrain] Rcross = R[Itrain, :][:, Itest] vc.addRandomEffect(K=Rtrain, Kcross=Rcross, trait_covar_type=tct, rank=rank, jitter=jitt, fixed_trait_covar=ftc) conv = vc.optimize(verbose=False, **keywords) if fullVector: assert conv, 'VarianceDecompositon:: not converged for fold %d. Stopped here' % fold_j if conv: RV[Itest, :] = vc.predictPhenos() else: warnings.warn('not converged for fold %d' % fold_j) RV[Itest, :] = sp.nan return RV
def optimize_with_repeates(self, fast=None, verbose=None, n_times=10, lambd=None): """ Train the model repeadly up to a number specified by the users with random restarts and return a list of all relative minima that have been found Args: fast: Boolean. if set to True initalize kronSumGP verbose: Boolean. If set to True, verbose output is produced. (default True) n_times: number of re-starts of the optimization. (default 10) """ verbose = limix.getVerbose(verbose) if not self.init: self._initGP(fast) opt_list = [] fixed0 = sp.zeros_like(self.gp.getParams()['dataTerm']) # minimize n_times for i in range(n_times): scales1 = self._getScalesRand() fixed1 = 1e-1 * sp.randn(fixed0.shape[0], fixed0.shape[1]) conv = self.trainGP(fast=fast, scales0=scales1, fixed0=fixed1, lambd=lambd) if conv: # compare with previous minima temp = 1 for j in range(len(opt_list)): if sp.allclose(abs(self.getScales()), abs(opt_list[j]['scales'])): temp = 0 opt_list[j]['counter'] += 1 break if temp == 1: opt = {} opt['counter'] = 1 opt['LML'] = self.getLML() opt['scales'] = self.getScales() opt_list.append(opt) # sort by LML LML = sp.array([opt_list[i]['LML'] for i in range(len(opt_list))]) index = LML.argsort()[::-1] out = [] if verbose: print "\nLocal mimima\n" print "n_times\t\tLML" print "------------------------------------" for i in range(len(opt_list)): out.append(opt_list[index[i]]) if verbose: print "%d\t\t%f" % (opt_list[index[i]]['counter'], opt_list[index[i]]['LML']) print "" return out
def optimize(self, fast=None, scales0=None, fixed0=None, init_method=None, termx=0, n_times=10, perturb=True, pertSize=1e-3, verbose=None, lambd=None): """ Train the model using the specified initialization strategy Args: fast: if true, fast gp is considered; if None (default), fast inference is considered if possible scales0: if not None init_method is set to manual fixed0: initial weights for fixed effects init_method: initialization strategy: 'random': variance component parameters (scales) are sampled from a normal distribution with mean 0 and std 1, 'diagonal': uses the a two-random-effect single trait model to initialize the parameters, 'manual': the starting point is set manually, termx: term used for initialization in the diagonal strategy n_times: number of restarts to converge perturb: if true, the initial point (set manually opr through the single-trait model) is perturbed with gaussian noise perturbSize: std of the gassian noise used to perturb the initial point verbose: print if convergence is achieved and how many restarted were needed """ verbose = limix.getVerbose(verbose) if init_method == None: if self.P == 1: init_method = 'random' else: init_method = 'diagonal' if not self.init: self._initGP(fast=fast) if scales0 != None and ~perturb: init_method = 'manual' if init_method == 'diagonal': scales0 = self._getScalesDiag(termx=termx) if init_method == 'pairwise': assert self.n_randEffs == 2, 'VarianceDecomposition:: pairwise initialization possible only with 2 terms' assert self.P > 1, 'VarianceDecomposition:: pairwise initialization possible only with P>1' i = (self.trait_covar_type[0] == 'freeform') * (self.trait_covar_type[0] == 'freeform') assert i, 'VarianceDecomposition:: pairwise initialization possible only with freeform matrices' scales0 = self._getScalesPairwise(verbose=verbose) if init_method in ['diagonal', 'manual', 'pairwise']: if not perturb: n_times = 1 if fixed0 == None: fixed0 = sp.zeros_like(self.gp.getParams()['dataTerm']) for i in range(n_times): if init_method == 'random': scales1 = self._getScalesRand() fixed1 = pertSize * sp.randn(fixed0.shape[0], fixed0.shape[1]) elif perturb: scales1 = scales0 + pertSize * self._perturbation() fixed1 = fixed0 + pertSize * sp.randn(fixed0.shape[0], fixed0.shape[1]) else: scales1 = scales0 fixed1 = fixed0 conv = self.trainGP(scales0=scales1, fixed0=fixed1, lambd=lambd) if conv: break if verbose: if conv == False: print 'No local minimum found for the tested initialization points' else: print 'Local minimum found at iteration %d' % i return conv
def crossValidation(self,seed=0,n_folds=10,fullVector=True,verbose=None,**keywords): """ Split the dataset in n folds, predict each fold after training the model on all the others Args: seed: seed n_folds: number of folds to train the model on fullVector: Bolean indicator, if true it stops if no convergence is observed for one of the folds, otherwise goes through and returns a pheno matrix with missing values verbose: if true, prints the fold that is being used for predicitons **keywords: params to pass to the function optimize Returns: Matrix of phenotype predictions [N,P] """ verbose = limix.getVerbose(verbose) # split samples into training and test sp.random.seed(seed) r = sp.random.permutation(self.Y.shape[0]) nfolds = 10 Icv = sp.floor(((sp.ones((self.Y.shape[0]))*nfolds)*r)/self.Y.shape[0]) RV = sp.zeros_like(self.Y) for fold_j in range(n_folds): if verbose: print ".. predict fold %d"%fold_j Itrain = Icv!=fold_j Itest = Icv==fold_j Ytrain = self.Y[Itrain,:] Ytest = self.Y[Itest,:] vc = VarianceDecomposition(Ytrain) vc.setTestSampleSize(Itest.sum()) for term_i in range(self.n_fixedEffs): F = self.vd.getFixed(term_i) Ftest = F[Itest,:] Ftrain = F[Itrain,:] if self.P>1: A = self.vd.getDesign(term_i) else: A = None vc.addFixedEffect(F=Ftrain,Ftest=Ftest,A=A) for term_i in range(self.n_randEffs): if self.P>1: tct = self.trait_covar_type[term_i] rank = self.rank[term_i] ftc = self.fixed_tc[term_i] jitt = self.jitter[term_i] else: tct = None rank = None ftc = None jitt = None if term_i==self.noisPos: vc.addRandomEffect(is_noise=True,trait_covar_type=tct,rank=rank,jitter=jitt,fixed_trait_covar=ftc) else: R = self.vd.getTerm(term_i).getK() Rtrain = R[Itrain,:][:,Itrain] Rcross = R[Itrain,:][:,Itest] vc.addRandomEffect(K=Rtrain,Kcross=Rcross,trait_covar_type=tct,rank=rank,jitter=jitt,fixed_trait_covar=ftc) conv = vc.optimize(verbose=False,**keywords) if fullVector: assert conv, 'VarianceDecompositon:: not converged for fold %d. Stopped here' % fold_j if conv: RV[Itest,:] = vc.predictPhenos() else: warnings.warn('not converged for fold %d' % fold_j) RV[Itest,:] = sp.nan return RV
def optimize_with_repeates(self,fast=None,verbose=None,n_times=10,lambd=None): """ Train the model repeadly up to a number specified by the users with random restarts and return a list of all relative minima that have been found. This list is sorted according to least likelihood. Each list term is a dictionary with keys "counter", "LML", and "scales". After running this function, the vc object will be set at the last iteration. Thus, if you wish to get the vc object of one of the repeats, then set the scales. For example: vc.setScales(scales=optimize_with_repeates_output[0]["scales"]) Args: fast: Boolean. if set to True initalize kronSumGP verbose: Boolean. If set to True, verbose output is produced. (default True) n_times: number of re-starts of the optimization. (default 10) """ verbose = limix.getVerbose(verbose) if not self.init: self._initGP(fast) opt_list = [] fixed0 = sp.zeros_like(self.gp.getParams()['dataTerm']) # minimize n_times for i in range(n_times): scales1 = self._getScalesRand() fixed1 = 1e-1*sp.randn(fixed0.shape[0],fixed0.shape[1]) conv = self.trainGP(fast=fast,scales0=scales1,fixed0=fixed1,lambd=lambd) if conv: # compare with previous minima temp=1 for j in range(len(opt_list)): if sp.allclose(abs(self.getScales()),abs(opt_list[j]['scales'])): temp=0 opt_list[j]['counter']+=1 break if temp==1: opt = {} opt['counter'] = 1 opt['LML'] = self.getLML() opt['scales'] = self.getScales() opt_list.append(opt) # sort by LML LML = sp.array([opt_list[i]['LML'] for i in range(len(opt_list))]) index = LML.argsort()[::-1] out = [] if verbose: print "\nLocal mimima\n" print "n_times\t\tLML" print "------------------------------------" for i in range(len(opt_list)): out.append(opt_list[index[i]]) if verbose: print "%d\t\t%f" % (opt_list[index[i]]['counter'], opt_list[index[i]]['LML']) print "" return out
def optimize(self,fast=None,scales0=None,fixed0=None,init_method=None,termx=0,n_times=10,perturb=True,pertSize=1e-3,verbose=None,lambd=None): """ Train the model using the specified initialization strategy Args: fast: if true, fast gp is considered; if None (default), fast inference is considered if possible scales0: if not None init_method is set to manual fixed0: initial weights for fixed effects init_method: initialization strategy: 'random': variance component parameters (scales) are sampled from a normal distribution with mean 0 and std 1, 'diagonal': uses the a two-random-effect single trait model to initialize the parameters, 'manual': the starting point is set manually, termx: term used for initialization in the diagonal strategy n_times: number of restarts to converge perturb: if true, the initial point (set manually opr through the single-trait model) is perturbed with gaussian noise perturbSize: std of the gassian noise used to perturb the initial point verbose: print if convergence is achieved and how many restarted were needed """ verbose = limix.getVerbose(verbose) if init_method==None: if self.P==1: init_method = 'random' else: init_method = 'diagonal' if not self.init: self._initGP(fast=fast) if scales0!=None and ~perturb: init_method = 'manual' if init_method=='diagonal': scales0 = self._getScalesDiag(termx=termx) if init_method=='pairwise': assert self.n_randEffs==2, 'VarianceDecomposition:: pairwise initialization possible only with 2 terms' assert self.P>1, 'VarianceDecomposition:: pairwise initialization possible only with P>1' i = (self.trait_covar_type[0]=='freeform')*(self.trait_covar_type[0]=='freeform') assert i, 'VarianceDecomposition:: pairwise initialization possible only with freeform matrices' scales0 = self._getScalesPairwise(verbose=verbose) if init_method in ['diagonal','manual','pairwise']: if not perturb: n_times = 1 if fixed0==None: fixed0 = sp.zeros_like(self.gp.getParams()['dataTerm']) for i in range(n_times): if init_method=='random': scales1 = self._getScalesRand() fixed1 = pertSize*sp.randn(fixed0.shape[0],fixed0.shape[1]) elif perturb: scales1 = scales0+pertSize*self._perturbation() fixed1 = fixed0+pertSize*sp.randn(fixed0.shape[0],fixed0.shape[1]) else: scales1 = scales0 fixed1 = fixed0 conv = self.trainGP(scales0=scales1,fixed0=fixed1,lambd=lambd) if conv: break if verbose: if conv==False: print 'No local minimum found for the tested initialization points' else: print 'Local minimum found at iteration %d' % i return conv
def optimize_with_repeates(self, fast=None, verbose=None, n_times=10, lambd=None): """ Train the model repeadly up to a number specified by the users with random restarts and return a list of all relative minima that have been found. This list is sorted according to least likelihood. Each list term is a dictionary with keys "counter", "LML", and "scales". After running this function, the vc object will be set at the last iteration. Thus, if you wish to get the vc object of one of the repeats, then set the scales. For example: vc.setScales(scales=optimize_with_repeates_output[0]["scales"]) Args: fast: Boolean. if set to True initalize kronSumGP verbose: Boolean. If set to True, verbose output is produced. (default True) n_times: number of re-starts of the optimization. (default 10) """ verbose = limix.getVerbose(verbose) if not self.init: self._initGP(fast) opt_list = [] fixed0 = sp.zeros_like(self.gp.getParams()['dataTerm']) # minimize n_times for i in range(n_times): scales1 = self._getScalesRand() fixed1 = 1e-1 * sp.randn(fixed0.shape[0], fixed0.shape[1]) conv = self.trainGP(fast=fast, scales0=scales1, fixed0=fixed1, lambd=lambd) if conv: # compare with previous minima temp = 1 for j in range(len(opt_list)): if sp.allclose(abs(self.getScales()), abs(opt_list[j]['scales'])): temp = 0 opt_list[j]['counter'] += 1 break if temp == 1: opt = {} opt['counter'] = 1 opt['LML'] = self.getLML() opt['scales'] = self.getScales() opt_list.append(opt) # sort by LML LML = sp.array([opt_list[i]['LML'] for i in range(len(opt_list))]) index = LML.argsort()[::-1] out = [] if verbose: print "\nLocal mimima\n" print "n_times\t\tLML" print "------------------------------------" for i in range(len(opt_list)): out.append(opt_list[index[i]]) if verbose: print "%d\t\t%f" % (opt_list[index[i]]['counter'], opt_list[index[i]]['LML']) print "" return out
def forward_lmm_kronecker(snps,phenos,Asnps=None,Acond=None,K1r=None,K1c=None,K2r=None,K2c=None,covs=None,Acovs=None,threshold=5e-8,maxiter=2,qvalues=False, update_covariances = False,verbose=None,**kw_args): """ Kronecker fixed effects test with forward selection Args: snps: [N x S] np.array of S SNPs for N individuals (test SNPs) pheno: [N x P] np.array of 1 phenotype for N individuals K: [N x N] np.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] np.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) qvalues: Use q-value threshold and return q-values in addition (default False) update_covar: Boolean indicator if covariances should be re-estimated after each forward step (default False) Returns: lm: lmix LMMi object resultStruct with elements: iadded: array of indices of SNPs included in order of inclusion pvadded: array of Pvalues obtained by the included SNPs in iteration before inclusion pvall: [Nadded x S] np.array of Pvalues for all iterations. Optional: corresponding q-values qvadded qvall """ verbose = limix.getVerbose(verbose) #0. checks N = phenos.shape[0] P = phenos.shape[1] if K1r==None: K1r = np.dot(snps,snps.T) else: assert K1r.shape[0]==N, 'K1r: dimensions dismatch' assert K1r.shape[1]==N, 'K1r: dimensions dismatch' if K2r==None: K2r = np.eye(N) else: assert K2r.shape[0]==N, 'K2r: dimensions dismatch' assert K2r.shape[1]==N, 'K2r: dimensions dismatch' covs,Acovs = _updateKronCovs(covs,Acovs,N,P) if Asnps is None: Asnps = [np.ones([1,P])] if (type(Asnps)!=list): Asnps = [Asnps] assert len(Asnps)>0, "need at least one Snp design matrix" if Acond is None: Acond = Asnps if (type(Acond)!=list): Acond = [Acond] assert len(Acond)>0, "need at least one Snp design matrix" #1. run GP model to infer suitable covariance structure if K1c==None or K2c==None: vc = _estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, **kw_args) K1c = vc.getTraitCovar(0) K2c = vc.getTraitCovar(1) else: vc = None assert K1c.shape[0]==P, 'K1c: dimensions dismatch' assert K1c.shape[1]==P, 'K1c: dimensions dismatch' assert K2c.shape[0]==P, 'K2c: dimensions dismatch' assert K2c.shape[1]==P, 'K2c: dimensions dismatch' t0 = time.time() lm,pv = test_lmm_kronecker(snps=snps,phenos=phenos,Asnps=Asnps,K1r=K1r,K2r=K2r,K1c=K1c,K2c=K2c,covs=covs,Acovs=Acovs) #get pv #start stuff iadded = [] pvadded = [] qvadded = [] time_el = [] pvall = [] qvall = None t1=time.time() if verbose: print ("finished GWAS testing in %.2f seconds" %(t1-t0)) time_el.append(t1-t0) pvall.append(pv) imin= np.unravel_index(pv.argmin(),pv.shape) score=pv[imin].min() niter = 1 if qvalues: assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = [] qv = FDR.qvalues(pv) qvall.append(qv) score=qv[imin] #loop: while (score<threshold) and niter<maxiter: t0=time.time() pvadded.append(pv[imin]) iadded.append(imin) if qvalues: qvadded.append(qv[imin]) if update_covariances and vc is not None: vc.addFixedTerm(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]]) vc.setScales()#CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None vc.findLocalOptima(fast=True) K1c = vc.getTraitCovar(0) K2c = vc.getTraitCovar(1) lm.setK1c(K1c) lm.setK2c(K2c) lm.addCovariates(snps[:,imin[1]:(imin[1]+1)],Acond[imin[0]]) for i in xrange(len(Asnps)): #add SNP design lm.setSNPcoldesign(Asnps[i]) lm.process() pv[i,:] = lm.getPv()[0] pvall.append(pv.ravel()) imin= np.unravel_index(pv.argmin(),pv.shape) if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter+1,:] = qv score = qv[imin].min() else: score = pv[imin].min() t1=time.time() if verbose: print ("finished GWAS testing in %.2f seconds" %(t1-t0)) time_el.append(t1-t0) niter=niter+1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = np.array(pvall) RV['time_el'] = time_el if qvalues: RV['qvall'] = qvall RV['qvadded'] = qvadded return lm,RV
def forward_lmm(snps,pheno,K=None,covs=None,qvalues=False,threshold=5e-8,maxiter=2,test='lrt',verbose=None,**kw_args): """ univariate fixed effects test with forward selection Args: snps: [N x S] np.array of S SNPs for N individuals (test SNPs) pheno: [N x 1] np.array of 1 phenotype for N individuals K: [N x N] np.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] np.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) test: 'lrt' for likelihood ratio test (default) or 'f' for F-test verbose: print verbose output? (False) Returns: lm: limix LMM object RV: dictionary RV['iadded']: array of indices of SNPs included in order of inclusion RV['pvadded']: array of Pvalues obtained by the included SNPs in iteration before inclusion RV['pvall']: [Nadded x S] np.array of Pvalues for all iterations """ verbose = limix.getVerbose(verbose) if K is None: K=np.eye(snps.shape[0]) if covs is None: covs = np.ones((snps.shape[0],1)) #assert single trait assert pheno.shape[1]==1, 'forward_lmm only supports single phenotypes' lm = test_lmm(snps,pheno,K=K,covs=covs,test=test,**kw_args) pvall = [] pv = lm.getPv().ravel() #hack to avoid issues with degenerate pv pv[sp.isnan(pv)] = 1 pvall.append(pv) imin= pv.argmin() niter = 1 #start stuff iadded = [] pvadded = [] qvadded = [] if qvalues: assert pv.shape[0]==1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = [] qv = FDR.qvalues(pv) qvall.append(qv) score=qv.min() else: score=pv.min() while (score<threshold) and niter<maxiter: t0=time.time() iadded.append(imin) pvadded.append(pv[imin]) if qvalues: qvadded.append(qv[0,imin]) covs=np.concatenate((covs,snps[:,imin:(imin+1)]),1) lm.setCovs(covs) lm.process() pv = lm.getPv().ravel() pv[sp.isnan(pv)] = 1 pvall.append(pv) imin= pv.argmin() if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter+1,:] = qv score = qv.min() else: score = pv.min() t1=time.time() if verbose: print ("finished GWAS testing in %.2f seconds" %(t1-t0)) niter=niter+1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = np.array(pvall) if qvalues: RV['qvall'] = np.array(qvall) RV['qvadded'] = qvadded return lm,RV