def test_lmm_lr(G, y, Z, Kbg, Covs=None): """ low-rank lmm input: G : genotypes y : phenotype Z : features of low-rank matrix Kbg : background covariance matrix Covs : fixed effect covariates """ vd = varianceDecomposition.VarianceDecomposition(y) if Covs is not None: vd.addFixedEffect(Covs) vd.addRandomEffect(Kbg) Klr = utils.computeLinearKernel(Z) vd.addRandomEffect(Klr) vd.addRandomEffect(is_noise=True) vd.optimize() varComps = vd.getVarianceComps()[0] Ktotal = varComps[0] * Kbg + varComps[1] * Klr lm = qtl.test_lmm(G, y, covs=Covs, K=Ktotal) pv = lm.getPv()[0] beta = lm.getBetaSNP()[0] var_snps = beta**2 * np.var(G, axis=0) var_genes = np.zeros(len(beta)) + varComps[1] var_covs = np.zeros(len(beta)) if Covs is not None: var_covs += np.dot(Covs, vd.getWeights()).var() return pv, beta, var_snps, var_covs, var_genes
def setUp(self): #check: do we have a csv File? self.dir_name = os.path.dirname(__file__) self.dataset = os.path.join(self.dir_name, 'varDecomp') if (not os.path.exists(self.dataset)) or 'recalc' in sys.argv: if not os.path.exists(self.dataset): os.makedirs(self.dataset) SP.random.seed(1) self.N = 200 self.S = 1000 self.P = 2 self.D = {} self.genGeno() self.genPheno() self.generate = True else: self.generate = False #self.D = data.load(os.path.join(self.dir_name,self.dataset)) self.D = data.load(self.dataset) self.N = self.D['X'].shape[0] self.S = self.D['X'].shape[1] self.P = self.D['Y'].shape[1] self.Kg = SP.dot(self.D['X'], self.D['X'].T) self.Kg = self.Kg / self.Kg.diagonal().mean() self.vc = VAR.VarianceDecomposition(self.D['Y']) self.vc.addRandomEffect(self.Kg, jitter=0) self.vc.addRandomEffect(is_noise=True, jitter=0) self.vc.addFixedEffect()
data_subsample = data.subsample_phenotypes(phenotype_query=phenotype_query, intersection=True) #get variables we need from data phenotypes, sample_idx = data_subsample.getPhenotypes( phenotype_query=phenotype_query, intersection=True) assert sample_idx.all() K = data_subsample.getCovariance() pos = data_subsample.getPos() #set parameters for the analysis N, P = phenotypes.shape # variance component model vc = VAR.VarianceDecomposition(phenotypes.values) vc.addFixedEffect() vc.addRandomEffect(K=K, trait_covar_type='lowrank_diag', rank=4) vc.addRandomEffect(is_noise=True, trait_covar_type='lowrank_diag', rank=4) vc.optimize() # retrieve geno and noise covariance matrix Cg = vc.getTraitCovar(0) Cn = vc.getTraitCovar(1) #convert P-values to a DataFrame for nice output writing: genetic_covar = pd.DataFrame(data=Cg, index=phenotypes.columns, columns=phenotypes.columns) noise_covar = pd.DataFrame(data=Cn, index=phenotypes.columns, columns=phenotypes.columns)
def varianceDecomposition(self, K=None, tech_noise=None, idx=None, i0=None, i1=None, max_iter=10, verbose=False): """ Args: K: list of random effects to be considered in the analysis idx: indices of the genes to be considered in the analysis i0: gene index from which the anlysis starts i1: gene index to which the analysis stops max_iter: maximum number of random restarts verbose: if True, print progresses """ if tech_noise is not None: self.set_tech_noise(tech_noise) assert self.tech_noise is not None, 'scLVM:: specify technical noise' assert K is not None, 'scLVM:: specify K' if not isinstance(K, list): K = [K] for k in K: assert k.shape[0] == self.N, 'scLVM:: K dimension dismatch' assert k.shape[1] == self.N, 'scLVM:: K dimension dismatch' if idx is None: if i0 is None or i1 is None: i0 = 0 i1 = self.G idx = SP.arange(i0, i1) elif not isinstance(idx, SP.ndarray): idx = SP.array([idx]) _G = len(idx) var = SP.zeros((_G, len(K) + 2)) _idx = SP.zeros(_G) geneID = SP.zeros(_G, dtype=str) conv = SP.zeros(_G) == 1 Ystar = [SP.zeros((self.N, _G)) for i in range(len(K))] count = 0 Ystd = self.Y - self.Y.mean( 0) #delta optimization might be more efficient Ystd /= self.Y.std(0) tech_noise = self.tech_noise / SP.array(self.Y.std(0))**2 for ids in idx: if verbose: print('.. fitting gene %d' % ids) # extract a single gene y = Ystd[:, ids:ids + 1] # build and fit variance decomposition model vc = VAR.VarianceDecomposition(y) vc.addFixedEffect() for k in K: vc.addRandomEffect(k) vc.addRandomEffect(SP.eye(self.N)) vc.addRandomEffect(SP.eye(self.N)) vc.vd.getTerm(len(K) + 1).getKcf().setParamMask(SP.zeros(1)) for iter_i in range(max_iter): scales0 = y.std() * SP.randn(len(K) + 2) scales0[len(K) + 1] = SP.sqrt(tech_noise[ids]) _conv = vc.optimize(scales0=scales0, n_times=2) if _conv: break conv[count] = _conv if not _conv: var[count, -2] = SP.maximum(0, y.var() - tech_noise[ids]) var[count, -1] = tech_noise[ids] count += 1 if self.geneID is not None: geneID[count] = self.geneID[ids] continue _var = vc.getVarianceComps()[0, :] KiY = vc.gp.agetKEffInvYCache().ravel() for ki in range(len(K)): Ystar[ki][:, count] = _var[ki] * SP.dot(K[ki], KiY) var[count, :] = _var count += 1 # col header col_header = ['hidden_%d' % i for i in range(len(K))] col_header.append('biol_noise') col_header.append('tech_noise') col_header = SP.array(col_header) # annotate column and rows of var and Ystar var_info = {'gene_idx': idx, 'col_header': col_header, 'conv': conv} if geneID is not None: var_info['geneID'] = SP.array(geneID) Ystar_info = {'gene_idx': idx, 'conv': conv} if geneID is not None: Ystar_info['geneID'] = SP.array(geneID) # cache stuff self.var = var self.Ystar = Ystar self.var_info = var_info self.Ystar_info = Ystar_info
Yu = np.array(Y, dtype='float') #Yu -= Yu.mean(0); Yu /= Yu.std(0) if center: Xu -= Xu.mean(0) Xu /= Xu.std(0) uKcis = SP.dot(Xu, Xu.T) uKtrans = uKpop - uKcis uKcis /= uKcis.diagonal().mean() uKtrans /= uKtrans.diagonal().mean() #4.3 perform experiment and store results in out_gene out_gene = {} print "cis/trans/noise + covariates variance decomposition" vc = VAR.VarianceDecomposition(Y) vc.addFixedEffect() vc.addRandomEffect(data.kgender) vc.addRandomEffect(data.kreprog) vc.addRandomEffect(data.kmedia) vc.addRandomEffect(data.kuser) vc.addRandomEffect(data.ksentrix_id) vc.addRandomEffect(data.ksentrix_pos) vc.addRandomEffect(data.kplate) vc.addRandomEffect(data.kwell) vc.addRandomEffect(data.ktime) vc.addRandomEffect(data.kpassage) vc.addRandomEffect(uKcis) vc.addRandomEffect(uKtrans) vc.addRandomEffect(is_noise=True) conv = vc.optimize()