예제 #1
0
    def LL(self, h, X=None, stack=True, REML=False):
        """
         Computes the log-likelihood for a given heritability (h).  If X==None, then the 
         default X0t will be used.  If X is set and stack=True, then X0t will be matrix concatenated with
         the input X.  If stack is false, then X is used in place of X0t in the LL calculation.
         REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
      """

        if X == None: X = self.X0t
        elif stack:
            self.X0t_stack[:, (self.q)] = matrixMult(self.Kve.T, X)[:, 0]
            X = self.X0t_stack

        n = float(self.N)
        q = float(X.shape[1])
        beta, sigma, Q, XX_i, XX = self.getMLSoln(h, X)
        LL = n * np.log(2 * np.pi) + np.log(h * self.Kva +
                                            (1.0 - h)).sum() + n + n * np.log(
                                                1.0 / n * Q)
        LL = -0.5 * LL

        if REML:
            LL_REML_part = q * np.log(2.0 * np.pi * sigma) + np.log(
                det(matrixMult(X.T, X))) - np.log(det(XX))
            LL = LL + 0.5 * LL_REML_part

        LL = LL.sum()
        return LL, beta, sigma, XX_i
예제 #2
0
파일: lmm.py 프로젝트: genenetwork/pylmm
def calculateKinship(W,center=False):
      """
	 W is an n x m matrix encoding SNP minor alleles.

	 This function takes a matrix oF SNPs, imputes missing values with the maf,
	 normalizes the resulting vectors and returns the RRM matrix.
      """
      n = W.shape[0]
      m = W.shape[1]
      keep = []
      for i in range(m):
	 mn = W[True - np.isnan(W[:,i]),i].mean()
	 W[np.isnan(W[:,i]),i] = mn
	 vr = W[:,i].var()
	 if vr == 0: continue

	 keep.append(i)
	 W[:,i] = (W[:,i] - mn) / np.sqrt(vr)

      W = W[:,keep]
      K = matrixMult(W,W.T) * 1.0/float(m)
      if center:
	 P = np.diag(np.repeat(1,n)) - 1/float(n) * np.ones((n,n))
	 S = np.trace(matrixMult(matrixMult(P,K),P))
	 K_n = (n - 1)*K / S
	 return K_n
      return K
예제 #3
0
def calculateKinship(W, center=False):
    """
         W is an n x m matrix encoding SNP minor alleles.

         This function takes a matrix oF SNPs, imputes missing values with the maf,
         normalizes the resulting vectors and returns the RRM matrix.
      """
    n = W.shape[0]
    m = W.shape[1]
    keep = []
    for i in range(m):
        mn = W[True - np.isnan(W[:, i]), i].mean()
        W[np.isnan(W[:, i]), i] = mn
        vr = W[:, i].var()
        if vr == 0: continue

        keep.append(i)
        W[:, i] = (W[:, i] - mn) / np.sqrt(vr)

    W = W[:, keep]
    K = matrixMult(W, W.T) * 1.0 / float(m)
    if center:
        P = np.diag(np.repeat(1, n)) - 1 / float(n) * np.ones((n, n))
        S = np.trace(matrixMult(matrixMult(P, K), P))
        K_n = (n - 1) * K / S
        return K_n
    return K
예제 #4
0
파일: lmm.py 프로젝트: genenetwork/pylmm
   def LL(self,h,X=None,stack=True,REML=False):

      """
	 Computes the log-likelihood for a given heritability (h).  If X==None, then the 
	 default X0t will be used.  If X is set and stack=True, then X0t will be matrix concatenated with
	 the input X.  If stack is false, then X is used in place of X0t in the LL calculation.
	 REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
      """

      if X == None: X = self.X0t
      elif stack: 
	 self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
	 X = self.X0t_stack

      n = float(self.N)
      q = float(X.shape[1])
      beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X)
      LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q)
      LL = -0.5 * LL

      if REML:
	 LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(det(matrixMult(X.T,X))) - np.log(det(XX))
	 LL = LL + 0.5*LL_REML_part


      LL = LL.sum()
      return LL,beta,sigma,XX_i
예제 #5
0
    def transform(self):
        """
         Computes a transformation on the phenotype vector and the covariate matrix.
         The transformation is obtained by left multiplying each parameter by the transpose of the 
         eigenvector matrix of K (the kinship).
      """

        self.Yt = matrixMult(self.Kve.T, self.Y)
        self.X0t = matrixMult(self.Kve.T, self.X0)
        self.X0t_stack = np.hstack([self.X0t, np.ones((self.N, 1))])
        self.q = self.X0t.shape[1]
예제 #6
0
파일: lmm.py 프로젝트: genenetwork/pylmm
   def transform(self):

      """
	 Computes a transformation on the phenotype vector and the covariate matrix.
	 The transformation is obtained by left multiplying each parameter by the transpose of the 
	 eigenvector matrix of K (the kinship).
      """

      self.Yt = matrixMult(self.Kve.T, self.Y)
      self.X0t = matrixMult(self.Kve.T, self.X0)
      self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))])
      self.q = self.X0t.shape[1]
예제 #7
0
    def fit(self, X=None, ngrids=100, REML=True):
        """
         Finds the maximum-likelihood solution for the heritability (h) given the current parameters.
         X can be passed and will transformed and concatenated to X0t.  Otherwise, X0t is used as 
         the covariate matrix.

         This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum.
         Given this optimum, the function computes the LL and associated ML solutions.
      """

        if X == None: X = self.X0t
        else:
            #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
            self.X0t_stack[:, (self.q)] = matrixMult(self.Kve.T, X)[:, 0]
            X = self.X0t_stack

        H = np.array(list(range(ngrids))) / float(ngrids)
        L = np.array([self.LL(h, X, stack=False, REML=REML)[0] for h in H])
        self.LLs = L

        hmax = self.getMax(H, X, REML)
        L, beta, sigma, betaSTDERR = self.LL(hmax, X, stack=False, REML=REML)

        self.H = H
        self.optH = hmax.sum()
        self.optLL = L
        self.optBeta = beta
        self.optSigma = sigma.sum()

        return hmax, beta, sigma, L
예제 #8
0
파일: lmm.py 프로젝트: genenetwork/pylmm
   def fit(self,X=None,ngrids=100,REML=True):

      """
	 Finds the maximum-likelihood solution for the heritability (h) given the current parameters.
	 X can be passed and will transformed and concatenated to X0t.  Otherwise, X0t is used as 
	 the covariate matrix.

	 This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum.
	 Given this optimum, the function computes the LL and associated ML solutions.
      """
      
      if X == None: X = self.X0t
      else: 
	 #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
	 self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
	 X = self.X0t_stack

      H = np.array(range(ngrids)) / float(ngrids)
      L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H])
      self.LLs = L

      hmax = self.getMax(H,X,REML)
      L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML)
      
      self.H = H
      self.optH = hmax.sum()
      self.optLL = L
      self.optBeta = beta
      self.optSigma = sigma.sum()

      return hmax,beta,sigma,L
예제 #9
0
    def getMLSoln(self, h, X):
        """
         Obtains the maximum-likelihood estimates for the covariate coefficients (beta),
         the total variance of the trait (sigma) and also passes intermediates that can 
         be utilized in other functions. The input parameter h is a value between 0 and 1 and represents
         the heritability or the proportion of the total variance attributed to genetics.  The X is the 
         covariate matrix.
      """

        S = 1.0 / (h * self.Kva + (1.0 - h))
        Xt = X.T * S
        XX = matrixMult(Xt, X)
        XX_i = inv(XX)
        beta = matrixMult(matrixMult(XX_i, Xt), self.Yt)
        Yt = self.Yt - matrixMult(X, beta)
        Q = np.dot(Yt.T * S, Yt)
        sigma = Q * 1.0 / (float(self.N) - float(X.shape[1]))
        return beta, sigma, Q, XX_i, XX
예제 #10
0
파일: lmm.py 프로젝트: genenetwork/pylmm
   def getMLSoln(self,h,X):

      """
	 Obtains the maximum-likelihood estimates for the covariate coefficients (beta),
	 the total variance of the trait (sigma) and also passes intermediates that can 
	 be utilized in other functions. The input parameter h is a value between 0 and 1 and represents
	 the heritability or the proportion of the total variance attributed to genetics.  The X is the 
	 covariate matrix.
      """
   
      S = 1.0/(h*self.Kva + (1.0 - h))
      Xt = X.T*S
      XX = matrixMult(Xt,X)
      XX_i = inv(XX)
      beta =  matrixMult(matrixMult(XX_i,Xt),self.Yt)
      Yt = self.Yt - matrixMult(X,beta)
      Q = np.dot(Yt.T*S,Yt)
      sigma = Q * 1.0 / (float(self.N) - float(X.shape[1]))
      return beta,sigma,Q,XX_i,XX
예제 #11
0
파일: lmm.py 프로젝트: genenetwork/pylmm
   def association(self,X, h = None, stack=True,REML=True, returnBeta=False):

      """
	Calculates association statitics for the SNPs encoded in the vector X of size n.
	If h == None, the optimal h stored in optH is used.

      """
      if stack: 
	 #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
	 self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
	 X = self.X0t_stack
	 
      if h == None: h = self.optH

      L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML)
      q  = len(beta)
      ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q)
      
      if returnBeta: return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma
      return ts,ps
예제 #12
0
    def association(self, X, h=None, stack=True, REML=True, returnBeta=False):
        """
        Calculates association statitics for the SNPs encoded in the vector X of size n.
        If h == None, the optimal h stored in optH is used.

      """
        if stack:
            #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
            self.X0t_stack[:, (self.q)] = matrixMult(self.Kve.T, X)[:, 0]
            X = self.X0t_stack

        if h == None: h = self.optH

        L, beta, sigma, betaVAR = self.LL(h, X, stack=False, REML=REML)
        q = len(beta)
        ts, ps = self.tstat(beta[q - 1], betaVAR[q - 1, q - 1], sigma, q)

        if returnBeta:
            return ts, ps, beta[q -
                                1].sum(), betaVAR[q - 1, q - 1].sum() * sigma
        return ts, ps