Beispiel #1
0
	def RealVar(self,y,X):
		lmmg=LMM()
		m=np.shape(X)[1];
		n=len(y);
		lmmg.setG(X/math.sqrt(m))
		lmmg.sety(y);
		lmmg.setX(np.ones([n,1]))
		try:
			dct=lmmg.findH2();
		except:
			dct={};
			dct['h2']=.5;
			mn=sum(y)/float(n);
			dct['sigma2']=sum([(i-mn)**2 for i in y])/float(n);
		h2=dct['h2'];
		s2=dct['sigma2'];
		sg2=h2*s2;
		se2=s2-sg2;
		return [se2,sg2];
Beispiel #2
0
 def RealVar(self, y, X):
     lmmg = LMM()
     m = np.shape(X)[1]
     n = len(y)
     lmmg.setG(X / math.sqrt(m))
     lmmg.sety(y)
     lmmg.setX(np.ones([n, 1]))
     try:
         dct = lmmg.findH2()
     except:
         dct = {}
         dct['h2'] = .5
         mn = sum(y) / float(n)
         dct['sigma2'] = sum([(i - mn)**2 for i in y]) / float(n)
     h2 = dct['h2']
     s2 = dct['sigma2']
     sg2 = h2 * s2
     se2 = s2 - sg2
     return [se2, sg2]
Beispiel #3
0
class GwasPrototype(object):
    """
    class to perform genome-wide scan
    """
    

    def __init__(self, train_snps, test_snps, phen, delta=None, cov=None, REML=False, train_pcs=None, mixing=0.0):
        """
        set up GWAS object
        """

        self.REML = REML
        self.train_snps = train_snps
        self.test_snps = test_snps
        self.phen = phen
        if delta is None:
            self.delta=None
        else:
            self.delta = delta * train_snps.shape[1]
        self.n_test = test_snps.shape[1]
        self.n_ind = len(self.phen)

        self.train_pcs = train_pcs
        self.mixing = mixing

        # add bias if no covariates are used
        if cov is None:
            self.cov = np.ones((self.n_ind, 1))
        else:
            self.cov = cov
        self.n_cov = self.cov.shape[1] 
       
        self.lmm = None
        self.res_null = None
        self.res_alt = []

        self.ll_null = None
        self.ll_alt = np.zeros(self.n_test)
        self.p_values = np.zeros(self.n_test)
        self.sorted_p_values = np.zeros(self.n_test)

        # merge covariates and test snps
        self.X = np.hstack((self.cov, self.test_snps))

    
    def precompute_UX(self, X): 
        ''' 
        precompute UX for all snps to be tested
        --------------------------------------------------------------------------
        Input:
        X       : [N*D] 2-dimensional array of covariates
        --------------------------------------------------------------------------
        '''

        logging.info("precomputing UX")

        self.UX = self.lmm.U.T.dot(X)
        self.k = self.lmm.S.shape[0]
        self.N = self.lmm.X.shape[0]
        if (self.k<self.N):
            self.UUX = X - self.lmm.U.dot(self.UX)

        logging.info("done.")


    def train_null(self):
        """
        train model under null hypothesis
        """

        logging.info("training null model")

        # use LMM
        self.lmm = LMM()
        self.lmm.setG(self.train_snps, self.train_pcs, a2=self.mixing)

        self.lmm.setX(self.cov)
        self.lmm.sety(self.phen)

        logging.info("finding delta")
        if self.delta is None:
            result = self.lmm.findH2(REML=self.REML, minH2=0.00001 )
            self.delta = 1.0/result['h2']-1.0
            
        # UX = lmm_null.U.dot(test_snps)
        self.res_null = self.lmm.nLLeval(delta=self.delta, REML=self.REML)
        self.ll_null = -self.res_null["nLL"]


    def set_current_UX(self, idx):
        """
        set the current UX to pre-trained LMM
        """

        si = idx + self.n_cov

        self.lmm.X = np.hstack((self.X[:,0:self.n_cov], self.X[:,si:si+1]))
        self.lmm.UX = np.hstack((self.UX[:,0:self.n_cov], self.UX[:,si:si+1]))
        if (self.k<self.N):
            self.lmm.UUX = np.hstack((self.UUX[:,0:self.n_cov], self.UUX[:,si:si+1]))
    

    def train_alt(self):
        """
        train alternative model
        """ 
   
        assert self.lmm != None
        self.precompute_UX(self.X)

        for idx in xrange(self.n_test):

            self.set_current_UX(idx)
            res = self.lmm.nLLeval(delta=self.delta, REML=self.REML)

            self.res_alt.append(res)
            self.ll_alt[idx] = -res["nLL"]

            if idx % 1000 == 0:
                logging.info("processing snp {0}".format(idx))


    def compute_p_values(self):
        """
        given trained null and alt models, compute p-values
        """

        # from C++ (?)
        #real df = rank_beta[ snp ] - ((real)1.0 * rank_beta_0[ snp ]) ;
        #pvals[ snp ] = PvalFromLikelihoodRatioTest( LL[ snp ] - LL_0[ snp ], ((real)0.5 * df) );

        degrees_of_freedom = 1

        assert len(self.res_alt) == self.n_test

        for idx in xrange(self.n_test):
            test_statistic = self.ll_alt[idx] - self.ll_null
            self.p_values[idx] = stats.chi2.sf(2.0 * test_statistic, degrees_of_freedom)

        
        self.p_idx = np.argsort(self.p_values)
        self.sorted_p_values = self.p_values[self.p_idx]
        


    def plot_result(self):
        """
        plot results
        """
        
        import pylab
        pylab.semilogy(self.p_values)
        pylab.show()

        dummy = [self.res_alt[idx]["nLL"] for idx in xrange(self.n_test)]
        pylab.hist(dummy, bins=100)
        pylab.title("neg likelihood")
        pylab.show()

        pylab.hist(self.p_values, bins=100)
        pylab.title("p-values")
        pylab.show()
 

    def run_gwas(self):
        """
        invoke all steps in the right order
        """

        self.train_null()
        self.train_alt()
        self.compute_p_values()
class GwasPrototype(object):
    """
    class to perform genome-wide scan
    """
    

    def __init__(self, train_snps, test_snps, phen, delta=None, cov=None, REML=False, train_pcs=None, mixing=0.0):
        """
        set up GWAS object
        """

        self.REML = REML
        self.train_snps = train_snps
        self.test_snps = test_snps
        self.phen = phen
        if delta is None:
            self.delta=None
        else:
            self.delta = delta * train_snps.shape[1]
        self.n_test = test_snps.shape[1]
        self.n_ind = len(self.phen)

        self.train_pcs = train_pcs
        self.mixing = mixing

        # add bias if no covariates are used
        if cov is None:
            self.cov = np.ones((self.n_ind, 1))
        else:
            self.cov = cov
        self.n_cov = self.cov.shape[1] 
       
        self.lmm = None
        self.res_null = None
        self.res_alt = []

        self.ll_null = None
        self.ll_alt = np.zeros(self.n_test)
        self.p_values = np.zeros(self.n_test)
        self.sorted_p_values = np.zeros(self.n_test)

        # merge covariates and test snps
        self.X = np.hstack((self.cov, self.test_snps))

    
    def precompute_UX(self, X): 
        ''' 
        precompute UX for all snps to be tested
        --------------------------------------------------------------------------
        Input:
        X       : [N*D] 2-dimensional array of covariates
        --------------------------------------------------------------------------
        '''

        logging.info("precomputing UX")

        self.UX = self.lmm.U.T.dot(X)
        self.k = self.lmm.S.shape[0]
        self.N = self.lmm.X.shape[0]
        if (self.k<self.N):
            self.UUX = X - self.lmm.U.dot(self.UX)

        logging.info("done.")


    def train_null(self):
        """
        train model under null hypothesis
        """

        logging.info("training null model")

        # use LMM
        self.lmm = LMM()
        self.lmm.setG(self.train_snps, self.train_pcs, a2=self.mixing)

        self.lmm.setX(self.cov)
        self.lmm.sety(self.phen)

        logging.info("finding delta")
        if self.delta is None:
            result = self.lmm.findH2(REML=self.REML, minH2=0.00001 )
            self.delta = 1.0/result['h2']-1.0
            
        # UX = lmm_null.U.dot(test_snps)
        self.res_null = self.lmm.nLLeval(delta=self.delta, REML=self.REML)
        self.ll_null = -self.res_null["nLL"]


    def set_current_UX(self, idx):
        """
        set the current UX to pre-trained LMM
        """

        si = idx + self.n_cov

        self.lmm.X = np.hstack((self.X[:,0:self.n_cov], self.X[:,si:si+1]))
        self.lmm.UX = np.hstack((self.UX[:,0:self.n_cov], self.UX[:,si:si+1]))
        if (self.k<self.N):
            self.lmm.UUX = np.hstack((self.UUX[:,0:self.n_cov], self.UUX[:,si:si+1]))
    

    def train_alt(self):
        """
        train alternative model
        """ 
   
        assert self.lmm != None
        self.precompute_UX(self.X)

        for idx in xrange(self.n_test):

            self.set_current_UX(idx)
            res = self.lmm.nLLeval(delta=self.delta, REML=self.REML)

            self.res_alt.append(res)
            self.ll_alt[idx] = -res["nLL"]

            if idx % 1000 == 0:
                logging.info("processing snp {0}".format(idx))


    def compute_p_values(self):
        """
        given trained null and alt models, compute p-values
        """

        # from C++ (?)
        #real df = rank_beta[ snp ] - ((real)1.0 * rank_beta_0[ snp ]) ;
        #pvals[ snp ] = PvalFromLikelihoodRatioTest( LL[ snp ] - LL_0[ snp ], ((real)0.5 * df) );

        degrees_of_freedom = 1

        assert len(self.res_alt) == self.n_test

        for idx in xrange(self.n_test):
            test_statistic = self.ll_alt[idx] - self.ll_null
            self.p_values[idx] = stats.chi2.sf(2.0 * test_statistic, degrees_of_freedom)

        
        self.p_idx = np.argsort(self.p_values)
        self.sorted_p_values = self.p_values[self.p_idx]
        


    def plot_result(self):
        """
        plot results
        """
        
        import pylab
        pylab.semilogy(self.p_values)
        pylab.show()

        dummy = [self.res_alt[idx]["nLL"] for idx in xrange(self.n_test)]
        pylab.hist(dummy, bins=100)
        pylab.title("neg likelihood")
        pylab.show()

        pylab.hist(self.p_values, bins=100)
        pylab.title("p-values")
        pylab.show()
 

    def run_gwas(self):
        """
        invoke all steps in the right order
        """

        self.train_null()
        self.train_alt()
        self.compute_p_values()