Ejemplo n.º 1
0
    def test_lmm2(self):
        """another test, establishing an lmm-equivalent by a design matrix choice"""
        for dn in self.datasets:
            D = data.load(os.path.join(self.dir_name,dn))
            #construct Kronecker LMM model which has the special case of standard LMM
            #covar1: genotype matrix
            N = D['K'].shape[0]
            P = 3
            K1r = D['K']
            #K1c = SP.zeros([2,2])
            #K1c[0,0] = 1
            K1c = SP.eye(P)
            K2r = SP.eye(N)
            K2c = SP.eye(P)

            #A   = SP.zeros([1,2])
            #A[0,0] =1
            A = SP.eye(P)
            Acov = SP.eye(P)
            Xcov = D['Cov'][:,SP.newaxis]
            X      = D['X']
            Y      = D['Y'][:,SP.newaxis]
            Y      = SP.tile(Y,(1,P))
                        
            lmm = limix.CKroneckerLMM()
            lmm.setK1r(K1r)
            lmm.setK1c(K1c)
            lmm.setK2r(K2r)
            lmm.setK2c(K2c)
            
            lmm.setSNPs(X)
            #add covariates
            lmm.addCovariates(Xcov,Acov)
            #add SNP design
            lmm.setSNPcoldesign(A)
            lmm.setPheno(Y)
            lmm.setNumIntervalsAlt(0)
            lmm.setNumIntervals0(100)
            
            lmm.process()
            
            #get p-values with P-dof:
            pv_Pdof = lmm.getPv().ravel()
            #transform in P-values with a single DOF:
            import scipy.stats as st
            lrt = st.chi2.isf(pv_Pdof,P)/P
            pv = st.chi2.sf(lrt,1)
            #compare with single DOF P-values:
            D2= ((SP.log10(pv)-SP.log10(D['pv']))**2)
            RV = SP.sqrt(D2.mean())
            #print "\n"
            #print pv[0:10]
            #print D['pv'][0:10]
            #print RV
            #pdb.set_trace()
            self.assertTrue(RV<1E-6)
Ejemplo n.º 2
0
 def test_lmm(self):
     """basic test, comparing pv to a standard LMM equivalent"""
     for dn in self.datasets:
         D = data.load(os.path.join(self.dir_name,dn))
         #construct Kronecker LMM model which has the special case of standard LMM
         #covar1: genotype matrix
         K1r = D['K']
         K1c = SP.eye(1)
         K2r = SP.eye(D['K'].shape[0])
         K2c = SP.eye(1)
         A   = SP.eye(1)
         Acov = SP.eye(1)
         Xcov  = D['Cov'][:,SP.newaxis]
         X      = D['X']
         Y      = D['Y'][:,SP.newaxis]
                     
         lmm = limix.CKroneckerLMM()
         lmm.setK1r(K1r)
         lmm.setK1c(K1c)
         lmm.setK2r(K2r)
         lmm.setK2c(K2c)
         
         lmm.setSNPs(X)
         #add covariates
         lmm.addCovariates(Xcov,Acov)
         #add SNP design
         lmm.setSNPcoldesign(A)
         lmm.setPheno(Y)
         lmm.setNumIntervalsAlt(0)
         lmm.setNumIntervals0(100)
         
         lmm.process()
         pv = lmm.getPv().ravel()
         D2= ((SP.log10(pv)-SP.log10(D['pv']))**2)
         RV = SP.sqrt(D2.mean())
         #print "\n"
         #print pv[0:10]
         #print D['pv'][0:10]
         #print RV
         #pdb.set_trace()
         self.assertTrue(RV<1E-6)
Ejemplo n.º 3
0
def simple_interaction_kronecker_deprecated(snps,
                                            phenos,
                                            covs=None,
                                            Acovs=None,
                                            Asnps1=None,
                                            Asnps0=None,
                                            K1r=None,
                                            K1c=None,
                                            K2r=None,
                                            K2c=None,
                                            covar_type='lowrank_diag',
                                            rank=1,
                                            searchDelta=False):
    """
    I-variate fixed effects interaction test for phenotype specific SNP effects. 
    (Runs multiple likelihood ratio tests and computes the P-values in python from the likelihood ratios)
    
    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        phenos: [N x P] SP.array of P phenotypes for N individuals
        covs:           list of SP.arrays holding covariates. Each covs[i] has one corresponding Acovs[i]
        Acovs:          list of SP.arrays holding the phenotype design matrices for covariates.
                        Each covs[i] has one corresponding Acovs[i].
        Asnps1:         list of SP.arrays of I interaction variables to be tested for N 
                        individuals. Note that it is assumed that Asnps0 is already included.
                        If not provided, the alternative model will be the independent model
        Asnps0:         single SP.array of I0 interaction variables to be included in the 
                        background model when testing for interaction with Inters
        K1r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K1c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covar_type:     type of covaraince to use. Default 'freeform'. possible values are 
                        'freeform': free form optimization, 
                        'fixed': use a fixed matrix specified in covar_K0,
                        'diag': optimize a diagonal matrix, 
                        'lowrank': optimize a low rank matrix. The rank of the lowrank part is specified in the variable rank,
                        'lowrank_id': optimize a low rank matrix plus the weight of a constant diagonal matrix. The rank of the lowrank part is specified in the variable rank, 
                        'lowrank_diag': optimize a low rank matrix plus a free diagonal matrix. The rank of the lowrank part is specified in the variable rank, 
                        'block': optimize the weight of a constant P x P block matrix of ones,
                        'block_id': optimize the weight of a constant P x P block matrix of ones plus the weight of a constant diagonal matrix,
                        'block_diag': optimize the weight of a constant P x P block matrix of ones plus a free diagonal matrix,         
        rank:           rank of a possible lowrank component (default 1)
        searchDelta:    Boolean indicator if delta is optimized during SNP testing (default False)
    
    Returns:
        pv:     P-values of the interaction test
        lrt0:   log likelihood ratio statistics of the null model
        pv0:    P-values of the null model
        lrt:    log likelihood ratio statistics of the interaction test
        lrtAlt: log likelihood ratio statistics of the alternative model
        pvAlt:  P-values of the alternative model
    """
    S = snps.shape[1]
    #0. checks
    N = phenos.shape[0]
    P = phenos.shape[1]

    if K1r == None:
        K1r = SP.dot(snps, snps.T)
    else:
        assert K1r.shape[0] == N, 'K1r: dimensions dismatch'
        assert K1r.shape[1] == N, 'K1r: dimensions dismatch'

    if K2r == None:
        K2r = SP.eye(N)
    else:
        assert K2r.shape[0] == N, 'K2r: dimensions dismatch'
        assert K2r.shape[1] == N, 'K2r: dimensions dismatch'

    covs, Acovs = updateKronCovs(covs, Acovs, N, P)

    #Asnps can be several designs
    if (Asnps0 is None):
        Asnps0 = [SP.ones([1, P])]
    if Asnps1 is None:
        Asnps1 = [SP.eye([P])]
    if (type(Asnps0) != list):
        Asnps0 = [Asnps0]
    if (type(Asnps1) != list):
        Asnps1 = [Asnps1]
    assert (len(Asnps0) == 1) and (
        len(Asnps1) >
        0), "need at least one Snp design matrix for null and alt model"

    #one row per column design matrix
    pv = SP.zeros((len(Asnps1), snps.shape[1]))
    lrt = SP.zeros((len(Asnps1), snps.shape[1]))
    pvAlt = SP.zeros((len(Asnps1), snps.shape[1]))
    lrtAlt = SP.zeros((len(Asnps1), snps.shape[1]))

    #1. run GP model to infer suitable covariance structure
    if K1c == None or K2c == None:
        vc = estimateKronCovariances(phenos=phenos,
                                     K1r=K1r,
                                     K2r=K2r,
                                     K1c=K1c,
                                     K2c=K2c,
                                     covs=covs,
                                     Acovs=Acovs,
                                     covar_type=covar_type,
                                     rank=rank)
        K1c = vc.getEstTraitCovar(0)
        K2c = vc.getEstTraitCovar(1)
    else:
        assert K1c.shape[0] == P, 'K1c: dimensions dismatch'
        assert K1c.shape[1] == P, 'K1c: dimensions dismatch'
        assert K2c.shape[0] == P, 'K2c: dimensions dismatch'
        assert K2c.shape[1] == P, 'K2c: dimensions dismatch'

    #2. run kroneckerLMM for null model
    lmm = limix.CKroneckerLMM()
    lmm.setK1r(K1r)
    lmm.setK1c(K1c)
    lmm.setK2r(K2r)
    lmm.setK2c(K2c)
    lmm.setSNPs(snps)
    #add covariates
    for ic in xrange(len(Acovs)):
        lmm.addCovariates(covs[ic], Acovs[ic])
    lmm.setPheno(phenos)
    if searchDelta: lmm.setNumIntervalsAlt(100)
    else: lmm.setNumIntervalsAlt(0)
    lmm.setNumIntervals0(100)
    #add SNP design
    lmm.setSNPcoldesign(Asnps0[0])
    lmm.process()
    dof0 = Asnps0[0].shape[0]
    pv0 = lmm.getPv()
    lrt0 = ST.chi2.isf(pv0, dof0)
    for iA in xrange(len(Asnps1)):
        dof1 = Asnps1[iA].shape[0]
        dof = dof1 - dof0
        lmm.setSNPcoldesign(Asnps1[iA])
        lmm.process()
        pvAlt[iA, :] = lmm.getPv()[0]
        lrtAlt[iA, :] = ST.chi2.isf(pvAlt[iA, :], dof1)
        lrt[iA, :] = lrtAlt[iA, :] - lrt0[
            0]  # Don't need the likelihood ratios, as null model is the same between the two models
        pv[iA, :] = ST.chi2.sf(lrt[iA, :], dof)
    return pv, lrt0, pv0, lrt, lrtAlt, pvAlt
Ejemplo n.º 4
0
def kronecker_lmm(snps,
                  phenos,
                  covs=None,
                  Acovs=None,
                  Asnps=None,
                  K1r=None,
                  K1c=None,
                  K2r=None,
                  K2c=None,
                  covar_type='lowrank_diag',
                  rank=1,
                  NumIntervalsDelta0=100,
                  NumIntervalsDeltaAlt=0,
                  searchDelta=False):
    """
    simple wrapper for kroneckerLMM code

    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        phenos: [N x P] SP.array of P phenotypes for N individuals
        covs:           list of SP.arrays holding covariates. Each covs[i] has one corresponding Acovs[i]
        Acovs:          list of SP.arrays holding the phenotype design matrices for covariates.
                        Each covs[i] has one corresponding Acovs[i].
        Asnps:          single SP.array of I0 interaction variables to be included in the 
                        background model when testing for interaction with Inters
                        If not provided, the alternative model will be the independent model
        K1r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K1c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covar_type:     type of covaraince to use. Default 'freeform'. possible values are 
                        'freeform': free form optimization, 
                        'fixed': use a fixed matrix specified in covar_K0,
                        'diag': optimize a diagonal matrix, 
                        'lowrank': optimize a low rank matrix. The rank of the lowrank part is specified in the variable rank,
                        'lowrank_id': optimize a low rank matrix plus the weight of a constant diagonal matrix. The rank of the lowrank part is specified in the variable rank, 
                        'lowrank_diag': optimize a low rank matrix plus a free diagonal matrix. The rank of the lowrank part is specified in the variable rank, 
                        'block': optimize the weight of a constant P x P block matrix of ones,
                        'block_id': optimize the weight of a constant P x P block matrix of ones plus the weight of a constant diagonal matrix,
                        'block_diag': optimize the weight of a constant P x P block matrix of ones plus a free diagonal matrix,         
        rank:           rank of a possible lowrank component (default 1)
        NumIntervalsDelta0:  number of steps for delta optimization on the null model (100)
        NumIntervalsDeltaAlt:number of steps for delta optimization on the alt. model (0 - no optimization)
        searchDelta:    Boolean indicator if delta is optimized during SNP testing (default False)

    Returns:
        CKroneckerLMM object
        P-values for all SNPs from liklelihood ratio test
    """
    #0. checks
    N = phenos.shape[0]
    P = phenos.shape[1]

    if K1r == None:
        K1r = SP.dot(snps, snps.T)
    else:
        assert K1r.shape[0] == N, 'K1r: dimensions dismatch'
        assert K1r.shape[1] == N, 'K1r: dimensions dismatch'

    if K2r == None:
        K2r = SP.eye(N)
    else:
        assert K2r.shape[0] == N, 'K2r: dimensions dismatch'
        assert K2r.shape[1] == N, 'K2r: dimensions dismatch'

    covs, Acovs = updateKronCovs(covs, Acovs, N, P)

    #Asnps can be several designs
    if Asnps is None:
        Asnps = [SP.ones([1, P])]
    if (type(Asnps) != list):
        Asnps = [Asnps]
    assert len(Asnps) > 0, "need at least one Snp design matrix"

    #one row per column design matrix
    pv = SP.zeros((len(Asnps), snps.shape[1]))

    #1. run GP model to infer suitable covariance structure
    if K1c == None or K2c == None:
        vc = estimateKronCovariances(phenos=phenos,
                                     K1r=K1r,
                                     K2r=K2r,
                                     K1c=K1c,
                                     K2c=K2c,
                                     covs=covs,
                                     Acovs=Acovs,
                                     covar_type=covar_type,
                                     rank=rank)
        K1c = vc.getEstTraitCovar(0)
        K2c = vc.getEstTraitCovar(1)
    else:
        assert K1c.shape[0] == P, 'K1c: dimensions dismatch'
        assert K1c.shape[1] == P, 'K1c: dimensions dismatch'
        assert K2c.shape[0] == P, 'K2c: dimensions dismatch'
        assert K2c.shape[1] == P, 'K2c: dimensions dismatch'

    #2. run kroneckerLMM

    lmm = limix.CKroneckerLMM()
    lmm.setK1r(K1r)
    lmm.setK1c(K1c)
    lmm.setK2r(K2r)
    lmm.setK2c(K2c)
    lmm.setSNPs(snps)
    #add covariates
    for ic in xrange(len(Acovs)):
        lmm.addCovariates(covs[ic], Acovs[ic])
    lmm.setPheno(phenos)

    #delta serch on alt. model?
    if searchDelta:
        lmm.setNumIntervalsAlt(NumIntervalsDeltaAlt)
    else:
        lmm.setNumIntervalsAlt(0)
    lmm.setNumIntervals0(NumIntervalsDelta0)

    for iA in xrange(len(Asnps)):
        #add SNP design
        lmm.setSNPcoldesign(Asnps[iA])
        lmm.process()
        pv[iA, :] = lmm.getPv()[0]
    return lmm, pv
Ejemplo n.º 5
0
def simple_interaction_kronecker(snps,
                                 phenos,
                                 covs=None,
                                 Acovs=None,
                                 Asnps1=None,
                                 Asnps0=None,
                                 K1r=None,
                                 K1c=None,
                                 K2r=None,
                                 K2c=None,
                                 covar_type='lowrank_diag',
                                 rank=1,
                                 NumIntervalsDelta0=100,
                                 NumIntervalsDeltaAlt=0,
                                 searchDelta=False):
    """
    I-variate fixed effects interaction test for phenotype specific SNP effects
    
    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        phenos: [N x P] SP.array of P phenotypes for N individuals
        covs:           list of SP.arrays holding covariates. Each covs[i] has one corresponding Acovs[i]
        Acovs:          list of SP.arrays holding the phenotype design matrices for covariates.
                        Each covs[i] has one corresponding Acovs[i].
        Asnps1:         list of SP.arrays of I interaction variables to be tested for N 
                        individuals. Note that it is assumed that Asnps0 is already included.
                        If not provided, the alternative model will be the independent model
        Asnps0:         single SP.array of I0 interaction variables to be included in the 
                        background model when testing for interaction with Inters
        K1r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K1c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covar_type:     type of covaraince to use. Default 'freeform'. possible values are 
                        'freeform': free form optimization, 
                        'fixed': use a fixed matrix specified in covar_K0,
                        'diag': optimize a diagonal matrix, 
                        'lowrank': optimize a low rank matrix. The rank of the lowrank part is specified in the variable rank,
                        'lowrank_id': optimize a low rank matrix plus the weight of a constant diagonal matrix. The rank of the lowrank part is specified in the variable rank, 
                        'lowrank_diag': optimize a low rank matrix plus a free diagonal matrix. The rank of the lowrank part is specified in the variable rank, 
                        'block': optimize the weight of a constant P x P block matrix of ones,
                        'block_id': optimize the weight of a constant P x P block matrix of ones plus the weight of a constant diagonal matrix,
                        'block_diag': optimize the weight of a constant P x P block matrix of ones plus a free diagonal matrix,         
        rank:           rank of a possible lowrank component (default 1)
        NumIntervalsDelta0:  number of steps for delta optimization on the null model (100)
        NumIntervalsDeltaAlt:number of steps for delta optimization on the alt. model (0 - no optimization)
        searchDelta:     Carry out delta optimization on the alternative model? if yes We use NumIntervalsDeltaAlt steps
    Returns:
        pv:     P-values of the interaction test
        pv0:    P-values of the null model
        pvAlt:  P-values of the alternative model
    """
    S = snps.shape[1]
    #0. checks
    N = phenos.shape[0]
    P = phenos.shape[1]

    if K1r == None:
        K1r = SP.dot(snps, snps.T)
    else:
        assert K1r.shape[0] == N, 'K1r: dimensions dismatch'
        assert K1r.shape[1] == N, 'K1r: dimensions dismatch'

    if K2r == None:
        K2r = SP.eye(N)
    else:
        assert K2r.shape[0] == N, 'K2r: dimensions dismatch'
        assert K2r.shape[1] == N, 'K2r: dimensions dismatch'

    covs, Acovs = updateKronCovs(covs, Acovs, N, P)

    #Asnps can be several designs
    if (Asnps0 is None):
        Asnps0 = [SP.ones([1, P])]
    if Asnps1 is None:
        Asnps1 = [SP.eye([P])]
    if (type(Asnps0) != list):
        Asnps0 = [Asnps0]
    if (type(Asnps1) != list):
        Asnps1 = [Asnps1]
    assert (len(Asnps0) == 1) and (
        len(Asnps1) >
        0), "need at least one Snp design matrix for null and alt model"

    #one row per column design matrix
    pv = SP.zeros((len(Asnps1), snps.shape[1]))
    lrt = SP.zeros((len(Asnps1), snps.shape[1]))
    pvAlt = SP.zeros((len(Asnps1), snps.shape[1]))
    lrtAlt = SP.zeros((len(Asnps1), snps.shape[1]))

    #1. run GP model to infer suitable covariance structure
    if K1c == None or K2c == None:
        vc = estimateKronCovariances(phenos=phenos,
                                     K1r=K1r,
                                     K2r=K2r,
                                     K1c=K1c,
                                     K2c=K2c,
                                     covs=covs,
                                     Acovs=Acovs,
                                     covar_type=covar_type,
                                     rank=rank)
        K1c = vc.getEstTraitCovar(0)
        K2c = vc.getEstTraitCovar(1)
    else:
        assert K1c.shape[0] == P, 'K1c: dimensions dismatch'
        assert K1c.shape[1] == P, 'K1c: dimensions dismatch'
        assert K2c.shape[0] == P, 'K2c: dimensions dismatch'
        assert K2c.shape[1] == P, 'K2c: dimensions dismatch'

    #2. run kroneckerLMM for null model
    lmm = limix.CKroneckerLMM()
    lmm.setK1r(K1r)
    lmm.setK1c(K1c)
    lmm.setK2r(K2r)
    lmm.setK2c(K2c)
    lmm.setSNPs(snps)
    #add covariates
    for ic in xrange(len(Acovs)):
        lmm.addCovariates(covs[ic], Acovs[ic])
    lmm.setPheno(phenos)

    #delta serch on alt. model?
    if searchDelta:
        lmm.setNumIntervalsAlt(NumIntervalsDeltaAlt)
        lmm.setNumIntervals0_inter(NumIntervalsDeltaAlt)
    else:
        lmm.setNumIntervalsAlt(0)
        lmm.setNumIntervals0_inter(0)

    lmm.setNumIntervals0(NumIntervalsDelta0)
    #add SNP design
    lmm.setSNPcoldesign0_inter(Asnps0[0])
    for iA in xrange(len(Asnps1)):
        lmm.setSNPcoldesign(Asnps1[iA])
        lmm.process()

        pvAlt[iA, :] = lmm.getPv()[0]
        pv[iA, :] = lmm.getPv()[1]
        pv0 = lmm.getPv()[2]
    return pv, pv0, pvAlt