def RealVar(self,y,X): lmmg=LMM() m=np.shape(X)[1]; n=len(y); lmmg.setG(X/math.sqrt(m)) lmmg.sety(y); lmmg.setX(np.ones([n,1])) try: dct=lmmg.findH2(); except: dct={}; dct['h2']=.5; mn=sum(y)/float(n); dct['sigma2']=sum([(i-mn)**2 for i in y])/float(n); h2=dct['h2']; s2=dct['sigma2']; sg2=h2*s2; se2=s2-sg2; return [se2,sg2];
def RealVar(self, y, X): lmmg = LMM() m = np.shape(X)[1] n = len(y) lmmg.setG(X / math.sqrt(m)) lmmg.sety(y) lmmg.setX(np.ones([n, 1])) try: dct = lmmg.findH2() except: dct = {} dct['h2'] = .5 mn = sum(y) / float(n) dct['sigma2'] = sum([(i - mn)**2 for i in y]) / float(n) h2 = dct['h2'] s2 = dct['sigma2'] sg2 = h2 * s2 se2 = s2 - sg2 return [se2, sg2]
class GwasPrototype(object): """ class to perform genome-wide scan """ def __init__(self, train_snps, test_snps, phen, delta=None, cov=None, REML=False, train_pcs=None, mixing=0.0): """ set up GWAS object """ self.REML = REML self.train_snps = train_snps self.test_snps = test_snps self.phen = phen if delta is None: self.delta=None else: self.delta = delta * train_snps.shape[1] self.n_test = test_snps.shape[1] self.n_ind = len(self.phen) self.train_pcs = train_pcs self.mixing = mixing # add bias if no covariates are used if cov is None: self.cov = np.ones((self.n_ind, 1)) else: self.cov = cov self.n_cov = self.cov.shape[1] self.lmm = None self.res_null = None self.res_alt = [] self.ll_null = None self.ll_alt = np.zeros(self.n_test) self.p_values = np.zeros(self.n_test) self.sorted_p_values = np.zeros(self.n_test) # merge covariates and test snps self.X = np.hstack((self.cov, self.test_snps)) def precompute_UX(self, X): ''' precompute UX for all snps to be tested -------------------------------------------------------------------------- Input: X : [N*D] 2-dimensional array of covariates -------------------------------------------------------------------------- ''' logging.info("precomputing UX") self.UX = self.lmm.U.T.dot(X) self.k = self.lmm.S.shape[0] self.N = self.lmm.X.shape[0] if (self.k<self.N): self.UUX = X - self.lmm.U.dot(self.UX) logging.info("done.") def train_null(self): """ train model under null hypothesis """ logging.info("training null model") # use LMM self.lmm = LMM() self.lmm.setG(self.train_snps, self.train_pcs, a2=self.mixing) self.lmm.setX(self.cov) self.lmm.sety(self.phen) logging.info("finding delta") if self.delta is None: result = self.lmm.findH2(REML=self.REML, minH2=0.00001 ) self.delta = 1.0/result['h2']-1.0 # UX = lmm_null.U.dot(test_snps) self.res_null = self.lmm.nLLeval(delta=self.delta, REML=self.REML) self.ll_null = -self.res_null["nLL"] def set_current_UX(self, idx): """ set the current UX to pre-trained LMM """ si = idx + self.n_cov self.lmm.X = np.hstack((self.X[:,0:self.n_cov], self.X[:,si:si+1])) self.lmm.UX = np.hstack((self.UX[:,0:self.n_cov], self.UX[:,si:si+1])) if (self.k<self.N): self.lmm.UUX = np.hstack((self.UUX[:,0:self.n_cov], self.UUX[:,si:si+1])) def train_alt(self): """ train alternative model """ assert self.lmm != None self.precompute_UX(self.X) for idx in xrange(self.n_test): self.set_current_UX(idx) res = self.lmm.nLLeval(delta=self.delta, REML=self.REML) self.res_alt.append(res) self.ll_alt[idx] = -res["nLL"] if idx % 1000 == 0: logging.info("processing snp {0}".format(idx)) def compute_p_values(self): """ given trained null and alt models, compute p-values """ # from C++ (?) #real df = rank_beta[ snp ] - ((real)1.0 * rank_beta_0[ snp ]) ; #pvals[ snp ] = PvalFromLikelihoodRatioTest( LL[ snp ] - LL_0[ snp ], ((real)0.5 * df) ); degrees_of_freedom = 1 assert len(self.res_alt) == self.n_test for idx in xrange(self.n_test): test_statistic = self.ll_alt[idx] - self.ll_null self.p_values[idx] = stats.chi2.sf(2.0 * test_statistic, degrees_of_freedom) self.p_idx = np.argsort(self.p_values) self.sorted_p_values = self.p_values[self.p_idx] def plot_result(self): """ plot results """ import pylab pylab.semilogy(self.p_values) pylab.show() dummy = [self.res_alt[idx]["nLL"] for idx in xrange(self.n_test)] pylab.hist(dummy, bins=100) pylab.title("neg likelihood") pylab.show() pylab.hist(self.p_values, bins=100) pylab.title("p-values") pylab.show() def run_gwas(self): """ invoke all steps in the right order """ self.train_null() self.train_alt() self.compute_p_values()