def define_gp(Y, Xr, Sg, Ug, type): from limix_core.covar import LowRankCov from limix_core.covar import FixedCov from limix_core.covar import FreeFormCov from limix_core.gp import GP3KronSumLR from limix_core.gp import GP2KronSum P = Y.shape[1] _A = sp.eye(P) if type in 'rank1': _Cr = LowRankCov(P, 1) elif type == 'block': _Cr = FixedCov(sp.ones((P, P))) elif type == 'full': _Cr = FreeFormCov(P) elif type == 'null': pass else: print('poppo') _Cn = FreeFormCov(P) _Cg = FreeFormCov(P) if type == 'null': _gp = GP2KronSum(Y=Y, Cg=_Cg, Cn=_Cn, S_R=Sg, U_R=Ug) else: _gp = GP3KronSumLR(Y=Y, G=Xr, Cr=_Cr, Cg=_Cg, Cn=_Cn, S_R=Sg, U_R=Ug) return _gp
def setUp(self): np.random.seed(1) # define phenotype N = 200 P = 2 self.Y = sp.randn(N, P) # define fixed effects self.F = [] self.A = [] self.F.append(1. * (sp.rand(N, 2) < 0.5)) self.A.append(sp.eye(P)) # define row covariance f = 10 X = 1. * (sp.rand(N, f) < 0.2) self.R = covar_rescale(sp.dot(X, X.T)) self.R += 1e-4 * sp.eye(N) # define col covariances self.Cg = FreeFormCov(P) self.Cn = FreeFormCov(P) self.Cg.setCovariance(0.5 * sp.cov(self.Y.T)) self.Cn.setCovariance(0.5 * sp.cov(self.Y.T)) # define gp self.gp = GP2KronSum(Y=self.Y, F=self.F, A=self.A, Cg=self.Cg, Cn=self.Cn, R=self.R)
def __init__(self, Y=None, R=None, S_R=None, U_R=None, traitID=None, F=None, rank=1): from limix_core.gp import GP2KronSum from limix_core.gp import GP2KronSumLR from limix_core.gp import GP3KronSumLR from limix_core.covar import FreeFormCov # data noneNone = S_R is not None and U_R is not None self.bgRE = R is not None or noneNone # fixed effect msg = "The current implementation of the full rank mtSet" msg += " does not support covariates." msg += " We reccommend to regress out covariates and" msg += " subsequently quantile normalize the phenotypes" msg += " to a normal distribution prior to use mtSet." msg += " This can be done within the LIMIX framework using" msg += " the methods limix.util.preprocess.regressOut and" msg += " limix.util.preprocess.gaussianize" assert not (F is not None and self.bgRE), msg from limix.util.preprocess import remove_dependent_cols if F is not None: F = remove_dependent_cols(F) A = sp.eye(Y.shape[1]) else: A = None # traitID if traitID is None: traitID = sp.array(["trait %d" % p for p in range(Y.shape[1])]) self.setTraitID(traitID) # init covariance matrices and gp Cg = FreeFormCov(Y.shape[1]) Cn = FreeFormCov(Y.shape[1]) G = 1. * (sp.rand(Y.shape[0], 1) < 0.2) if self.bgRE: self._gp = GP3KronSumLR(Y=Y, Cg=Cg, Cn=Cn, R=R, S_R=S_R, U_R=U_R, G=G, rank=rank) else: self._gp = GP2KronSumLR(Y=Y, Cn=Cn, G=G, F=F, A=A) # null model params self.null = None # calls itself for column-by-column trait analysis self.stSet = None self.nullST = None self.infoOpt = None self.infoOptST = None pass
def define_gp(Y, Xr, F, type, Rr): from limix_core.covar import LowRankCov from limix_core.covar import FixedCov from limix_core.covar import FreeFormCov from limix_core.gp import GP2KronSumLR from limix_core.gp import GP2KronSum P = Y.shape[1] _A = sp.eye(P) if type in ['null', 'rank1']: _Cr = LowRankCov(P, 1) elif type == 'block': _Cr = FixedCov(sp.ones((P, P))) elif type == 'full': _Cr = FreeFormCov(P) else: print('poppo') _Cn = FreeFormCov(P) if type == 'null': _gp = GP2KronSumLR(Y=Y, G=sp.ones((Y.shape[0], 1)), F=F, A=_A, Cr=_Cr, Cn=_Cn) _Cr.setParams(1e-9 * sp.ones(P)) _gp.covar.act_Cr = False else: if Xr.shape[1] < Xr.shape[0]: _gp = GP2KronSumLR(Y=Y, G=Xr, F=F, A=_A, Cr=_Cr, Cn=_Cn) else: _gp = GP2KronSum(Y=Y, F=F, A=_A, R=Rr, Cg=_Cr, Cn=_Cn) return _gp
def setUp(self): SP.random.seed(1) self.n = 4 self.C = FreeFormCov(self.n) self.name = 'freeform' self.n_params = self.C.getNumberParams() params = SP.randn(self.n_params) self.C.setParams(params)
def fit_null(self, F=None, verbose=True): """ Parameters ---------- F : (`N`, L) ndarray fixed effect design for covariates. Returns ------- RV : dict Dictionary with null model info (TODO add details) """ # F is a fixed effect covariate matrix with dim = N by D # F itself cannot have any cols of 0's and it won't work if it is None self.F = F self.qweliumod = CompQuadFormLiuMod() self.qwedavies = CompQuadFormDavies() self.qwedaviesskat = CompQuadFormDaviesSkat() if self.K is not None: # Decompose K into low rank version S_K, U_K = la.eigh(self.K) S = sp.array([i for i in S_K if i > 1e-9]) U = U_K[:, -len(S):] # In most cases W = E but have left it as seperate parameter for # flexibility self.W = U * S**0.5 self.gp = GP2KronSumLR( Y=self.y, F=self.F, A=sp.eye(1), Cn=FreeFormCov(1), G=self.W) self.gp.covar.Cr.setCovariance(0.5 * sp.ones((1, 1))) self.gp.covar.Cn.setCovariance(0.5 * sp.ones((1, 1))) RV = self.gp.optimize(verbose=verbose) # Get optimal kernel parameters self.covarparam0 = self.gp.covar.Cr.K()[0, 0] self.covarparam1 = self.gp.covar.Cn.K()[0, 0] self.Kiy = self.gp.Kiy() elif self.W is not None: self.gp = GP2KronSumLR( Y=self.y, F=self.F, A=sp.eye(1), Cn=FreeFormCov(1), G=self.W) self.gp.covar.Cr.setCovariance(0.5 * sp.ones((1, 1))) self.gp.covar.Cn.setCovariance(0.5 * sp.ones((1, 1))) RV = self.gp.optimize(verbose=verbose) self.covarparam0 = self.gp.covar.Cr.K()[0, 0] # getParams()[0] self.covarparam1 = self.gp.covar.Cn.K()[0, 0] self.Kiy = self.gp.Kiy() else: # If there is no kernel then solve analytically self.alpha_hat = sp.dot( sp.dot(la.inv(sp.dot(self.F.T, self.F)), self.F.T), self.y) yminus_falpha_hat = self.y - sp.dot(self.F, self.alpha_hat) self.covarparam1 = ( yminus_falpha_hat**2).sum() / yminus_falpha_hat.shape[0] self.covarparam0 = 0 self.Kiy = (1 / float(self.covarparam1)) * self.y self.W = sp.zeros(self.y.shape) RV = self.covarparam0 return RV
def test_too_expensive_exceptions(self): Cg = FreeFormCov(5001) Cn = FreeFormCov(5001) C = Cov2KronSum(Cg=Cg, Cn=Cn, R=self.R) with self.assertRaises(TooExpensiveOperationError): C.L() with self.assertRaises(TooExpensiveOperationError): C.K() with self.assertRaises(TooExpensiveOperationError): C.K_grad_i(0)
def setUp(self): sp.random.seed(1) # define row caoriance dim_r = 4 X = sp.rand(dim_r, dim_r) self.R = covar_rescale(sp.dot(X,X.T)) # define col covariances dim_c = 2 Cg = FreeFormCov(dim_c) Cn = FreeFormCov(dim_c) self.C = Cov2KronSum(Cg = Cg, Cn = Cn, R = self.R) self.name = 'cov2kronSum' self.C.setRandomParams()
def test_mtmm_scan_pv_beta(): import scipy as sp import scipy.linalg as la from limix_core.gp import GP2KronSum from limix_core.covar import FreeFormCov N = 200 P = 4 M = 2 K = 2 S = 10 Y, F, G, B0, Cg0, Cn0 = _generate_data(N, P, K, S) A = sp.eye(P) Asnp = sp.rand(P, M) # compute eigenvalue decomp of RRM R = sp.dot(G, G.T) R /= R.diagonal().mean() R += 1e-4 * sp.eye(R.shape[0]) Sr, Ur = la.eigh(R) # fit null model Cg = FreeFormCov(Y.shape[1]) Cn = FreeFormCov(Y.shape[1]) gp = GP2KronSum(Y=Y, S_R=Sr, U_R=Ur, Cg=Cg, Cn=Cn, F=F, A=sp.eye(P)) gp.covar.Cg.setCovariance(0.5 * sp.cov(Y.T)) gp.covar.Cn.setCovariance(0.5 * sp.cov(Y.T)) gp.optimize(factr=10) # run MTLMM from limix_lmm import MTLMM mtlmm = MTLMM(Y, F=F, A=A, Asnp=Asnp, covar=gp.covar) pv, B = mtlmm.process(G) # run standard LMMcore from limix_lmm.lmm_core import LMMCore y = sp.reshape(Y, [Y.size, 1], order="F") covs = sp.kron(A, F) Aext = sp.kron(Asnp, sp.ones((G.shape[0], 1))) Gext = sp.kron(sp.ones((Asnp.shape[0], 1)), G) Wext = sp.einsum("ip,in->inp", Aext, Gext).reshape(Aext.shape[0], -1) stlmm = LMMCore(y, covs, Ki_dot=gp.covar.solve) stlmm.process(Wext, step=Asnp.shape[1]) pv0 = stlmm.getPv() B0 = stlmm.getBetaSNP() assert_allclose(pv0, pv, rtol=1e-06, atol=1e-06) assert_allclose(B0, B, rtol=1e-06, atol=1e-06)
def setUp(self): sp.random.seed(1) # define row caoriance dim_r = 4 rank_r = 2 G = sp.rand(dim_r, rank_r) X = sp.rand(dim_r, dim_r) R = covar_rescale(sp.dot(X, X.T)) # define col covariances dim_c = 2 Cg = FreeFormCov(dim_c) Cn = FreeFormCov(dim_c) self.C = Cov3KronSumLR(Cn=Cn, Cg=Cg, R=R, G=G, rank=1) self.name = 'cov3kronSumLR' self.C.setRandomParams()
def calc_opt_rho(self): from limix_core.covar import FreeFormCov from limix_core.gp import GP2KronSumLR _covs = sp.concatenate([self.F, self.W, self.x], 1) xoE = self.x * self.Env gp = GP2KronSumLR(Y=self.y, F=_covs, A=sp.eye(1), Cn=FreeFormCov(1), G=xoE) gp.covar.Cr.setCovariance(1e-4 * sp.ones((1, 1))) gp.covar.Cn.setCovariance(0.02 * sp.ones((1, 1))) gp.optimize(verbose=False) # var_xEEx = sp.tr(xEEx P)/(n-1) = sp.tr(PW (PW)^T)/(n-1) = (PW**2).sum()/(n-1) # W = xE # variance heterogenenty var_xEEx = ((xoE - xoE.mean(0))**2).sum() var_xEEx /= float(self.y.shape[0] - 1) v_het = gp.covar.Cr.K()[0, 0] * var_xEEx # variance persistent v_comm = sp.var(gp.b()[-1] * self.x) rho = v_het / (v_comm + v_het) return rho
def train_model(self): import scipy as sp from limix_core.covar import FreeFormCov from limix_core.gp import GP2KronSumLR _covs = sp.concatenate([self.F, self.W, self.x], 1) self.snp_mean = self.x.mean(0) self.x_std = self.x - self.snp_mean self.snp_std = self.x_std.std(0) self.x_std /= self.snp_std self.xoE = self.x_std * self.TrainingEnv gp = GP2KronSumLR(Y=self.y, F=_covs, A=sp.eye(1), Cn=FreeFormCov(1), G=self.xoE) gp.covar.Cr.setCovariance(1e-4 * sp.ones((1, 1))) gp.covar.Cn.setCovariance(0.02 * sp.ones((1, 1))) gp.optimize(verbose=False) self.alpha = gp.b() self.sigma_1 = gp.covar.Cr.K()[0, 0] self.sigma_2 = gp.covar.Cn.K()[0, 0] self.y_adjust = self.y - sp.dot(_covs, self.alpha) self.persistent_effect = gp.b()[-1] return self.persistent_effect
def test_too_expensive_exceptions(self): dim_r = 100 rank_r = 2 G = sp.rand(dim_r, rank_r) X = sp.rand(dim_r, dim_r) R = covar_rescale(sp.dot(X, X.T)) dim_c = 5001 Cg = FreeFormCov(dim_c) Cn = FreeFormCov(dim_c) C = Cov3KronSumLR(Cn=Cn, Cg=Cg, R=R, G=G, rank=1) with self.assertRaises(TooExpensiveOperationError): C.L() with self.assertRaises(TooExpensiveOperationError): C.K() with self.assertRaises(TooExpensiveOperationError): C.K_grad_i(0)
def setUp(self): np.random.seed(1) dim_r = 10 dim_c = 3 X = sp.rand(dim_r, dim_r) R = covar_rescale(sp.dot(X, X.T)) C = FreeFormCov(dim_c) self._cov = KronCov(C, R) self._Iok = sp.randn(self._cov.dim) < 0.9
def setUp(self): sp.random.seed(2) # define row caoriance n = 200 f = 10 P = 3 X = 1. * (sp.rand(n, f) < 0.2) # define col covariances Cn = FreeFormCov(P) self.C = Cov2KronSumLR(Cn=Cn, G=X, rank=1) self.name = 'cov2kronSumLR' self.C.setRandomParams()
def _buildTraitCovar(self, trait_covar_type='freeform', rank=1, fixed_trait_covar=None, jitter=1e-4): """ Internal functions that builds the trait covariance matrix using the LIMIX framework Args: trait_covar_type: type of covaraince to use. Default 'freeform'. possible values are rank: rank of a possible lowrank component (default 1) fixed_trait_covar: PxP matrix for the (predefined) trait-to-trait covariance matrix if fixed type is used jitter: diagonal contribution added to freeform covariance matrices for regularization Returns: LIMIX::Covariance for Trait covariance matrix """ from limix_core.covar import (FreeFormCov, FixedCov, DiagonalCov, LowRankCov, SumCov) assert trait_covar_type in [ 'freeform', 'diag', 'lowrank', 'lowrank_id', 'lowrank_diag', 'block', 'block_id', 'block_diag', 'fixed' ], 'VarianceDecomposition:: trait_covar_type not valid' if trait_covar_type == 'freeform': cov = FreeFormCov(self.P, jitter=jitter) elif trait_covar_type == 'fixed': assert fixed_trait_covar is not None, 'VarianceDecomposition:: set fixed_trait_covar' assert fixed_trait_covar.shape[ 0] == self.P, 'VarianceDecomposition:: Incompatible shape for fixed_trait_covar' assert fixed_trait_covar.shape[ 1] == self.P, 'VarianceDecomposition:: Incompatible shape for fixed_trait_covar' cov = FixedCov(fixed_trait_covar) elif trait_covar_type == 'diag': cov = DiagonalCov(self.P) elif trait_covar_type == 'lowrank': cov = LowRankCov(self.P, rank=rank) elif trait_covar_type == 'lowrank_id': cov = SumCov(LowRankCov(self.P, rank=rank), FixedCov(sp.eye(self.P))) elif trait_covar_type == 'lowrank_diag': cov = SumCov(LowRankCov(self.P, rank=rank), DiagonalCov(self.P)) elif trait_covar_type == 'block': cov = FixedCov(sp.ones([self.P, self.P])) elif trait_covar_type == 'block_id': cov1 = FixedCov(sp.ones([self.P, self.P])) cov2 = FixedCov(sp.eye(self.P)) cov = SumCov(cov1, cov2) elif trait_covar_type == 'block_diag': cov1 = FixedCov(sp.ones([self.P, self.P])) cov2 = FixedCov(sp.eye(self.P)) cov = SumCov(cov1, cov2) return cov
def setUp(self): np.random.seed(1) # define phenotype N = 10 P = 3 Y = sp.randn(N, P) # pheno with missing data Ym = Y.copy() Im = sp.rand(N, P) < 0.2 Ym[Im] = sp.nan # define fixed effects F = [] A = [] F.append(1. * (sp.rand(N, 2) < 0.5)) A.append(sp.eye(P)) mean = MeanKronSum(Y, F=F, A=A) mean_m = MeanKronSum(Ym, F=F, A=A) # define row caoriance f = 10 X = 1. * (sp.rand(N, f) < 0.2) R = covar_rescale(sp.dot(X, X.T)) R += 1e-4 * sp.eye(N) # define col covariances Cg = FreeFormCov(P) Cn = FreeFormCov(P) Cg.setRandomParams() Cn.setRandomParams() # define covariance matrices covar1 = KronCov(Cg, R) covar2 = KronCov(Cn, sp.eye(N)) covar = SumCov(covar1, covar2) # define covariance matrice with missing data Iok = (~Im).reshape(N * P, order='F') covar1_m = KronCov(copy.copy(Cg), R, Iok=Iok) covar2_m = KronCov(copy.copy(Cn), sp.eye(N), Iok=Iok) covar_m = SumCov(covar1_m, covar2_m) # define gp self._gp = GP(covar=covar, mean=mean) self._gpm = GP(covar=covar_m, mean=mean_m) self._gp2ks = GP2KronSum(Y=Y, F=F, A=A, Cg=Cg, Cn=Cn, R=R)
def calc_full_model(self): _covs = sp.concatenate([self.F, self.W, self.x], 1) xoE = self.x * self.Env gp = GP2KronSumLR(Y=self.y, F=_covs, A=sp.eye(1), Cn=FreeFormCov(1), G=xoE) gp.covar.Cr.setCovariance(1e-4 * sp.ones((1, 1))) gp.covar.Cn.setCovariance(0.02 * sp.ones((1, 1))) RV = gp.optimize(verbose=False) lml = -gp.LML() return lml
def test_too_expensive_exceptions(self): n = 5001 f = 10 Cn = FreeFormCov(5001) X = 1. * (sp.rand(n, f) < 0.2) C = Cov2KronSumLR(Cn=Cn, G=X, rank=1) with self.assertRaises(TooExpensiveOperationError): C.L() with self.assertRaises(TooExpensiveOperationError): C.R() with self.assertRaises(TooExpensiveOperationError): C.K() with self.assertRaises(TooExpensiveOperationError): C.K_grad_i(0)
def calc_marginal_model(self, env_remove=0): _covs = sp.concatenate([self.F, self.W, self.x], 1) Env_subset = sp.delete(self.Env, env_remove, axis=1) xoE = self.x * Env_subset gp = GP2KronSumLR(Y=self.y, F=_covs, A=sp.eye(1), Cn=FreeFormCov(1), G=xoE) gp.covar.Cr.setCovariance(1e-4 * sp.ones((1, 1))) gp.covar.Cn.setCovariance(0.02 * sp.ones((1, 1))) RV = gp.optimize(verbose=False) lml = -gp.LML() return lml
def setUp(self): np.random.seed(1) # define phenotype N = 200 P = 2 Y = sp.randn(N,P) # define row caoriance f = 10 G = 1.*(sp.rand(N, f)<0.2) X = 1.*(sp.rand(N, f)<0.2) R = covar_rescale(sp.dot(X,X.T)) R+= 1e-4 * sp.eye(N) # define col covariances Cg = FreeFormCov(P) self._Cg = Cg Cn = FreeFormCov(P) Cg.setCovariance(0.5 * sp.cov(Y.T)) Cn.setCovariance(0.5 * sp.cov(Y.T)) # define gp self.gp = GP3KronSumLR(Y = Y, Cg = Cg, Cn = Cn, R = R, G = G, rank = 1)
def calc_lml(self, Env): from limix_core.covar import FreeFormCov from limix_core.gp import GP2KronSumLR _covs = sp.concatenate([self.F, self.W, self.x], 1) if Env.shape[1] == 0: xoE = sp.ones(self.x.shape) else: xoE = self.x * Env gp = GP2KronSumLR(Y=self.y, F=_covs, A=sp.eye(1), Cn=FreeFormCov(1), G=xoE) gp.covar.Cr.setCovariance(1e-4 * sp.ones((1, 1))) gp.covar.Cn.setCovariance(0.02 * sp.ones((1, 1))) gp.optimize(verbose=False) lml = -gp.LML() return lml
def define_gp(Y, Xr, mean, Ie, type): from limix_core.covar import LowRankCov from limix_core.covar import FixedCov from limix_core.covar import FreeFormCov from limix_core.covar import CategoricalLR from limix_core.gp import GP P = 2 if type == 'null': _Cr = FixedCov(sp.ones([2, 2])) _Cr.scale = 1e-9 _Cr.act_scale = False covar = CategoricalLR(_Cr, sp.ones((Xr.shape[0], 1)), Ie) else: if type == 'block': _Cr = FixedCov(sp.ones((P, P))) elif type == 'rank1': _Cr = LowRankCov(P, 1) elif type == 'full': _Cr = FreeFormCov(P) else: print('poppo') covar = CategoricalLR(_Cr, Xr, Ie) _gp = GP(covar=covar, mean=mean) return _gp
def mt_scan(G, Y, M=None, K=None, Ac=None, Asnps=None, Asnps0=None, verbose=True): """ Wrapper function for multi-trait single-variant association testing using variants of the multi-trait linear mixed model. Parameters ---------- Y : (`N`, `P`) ndarray phenotype data Asnps : (`P`, `K`) ndarray trait design of snp covariance. By default, ``Asnps`` is eye(`P`). R : (`N`, `N`) ndarray LMM-covariance/genetic relatedness matrix. If not provided, then standard linear regression is considered. Alternatively, its eighenvalue decomposition can be provided through ``eigh_R``. if ``eigh_R`` is set, this parameter is ignored. eigh_R : tuple Tuple with `N` ndarray of eigenvalues of `R` and (`N`, `N`) ndarray of eigenvectors of ``R``. covs : (`N`, `D`) ndarray covariate design matrix. By default, ``covs`` is a (`N`, `1`) array of ones. Ac : (`P`, `L`) ndarray trait design matrices of the different fixed effect terms. By default, ``Ac`` is eye(`P`). Asnps0 : (`P`, `K`) ndarray trait design of snp covariance in the null model. By default, Asnps0 is not considered (i.e., no SNP effect in the null model). If specified, then three tests are considered: (i) Asnps vs , (ii) Asnps0!=0, (iii) Asnps!=Asnps0 verbose : (bool, optional): if True, details such as runtime as displayed. """ from pandas import DataFrame from scipy.stats import chi2 from numpy import eye, cov, asarray from scipy.linalg import eigh from limix_core.gp import GP2KronSum from limix_core.covar import FreeFormCov from limix_lmm.mtlmm import MTLMM if Ac is None: Ac = eye(Y.shape[1]) with session_block("single-trait association test", disable=not verbose): with session_line("Normalising input... ", disable=not verbose): data = conform_dataset(Y, M, G=G, K=K) Y = asarray(data["y"]) M = asarray(data["M"]) G = asarray(data["G"]) K = asarray(data["K"]) # case 1: multi-trait linear model if K is None: raise ValueError("multi-trait linear model not supported") eigh_R = eigh(K) # case 2: full-rank multi-trait linear model S_R, U_R = eigh_R S_R = add_jitter(S_R) gp = GP2KronSum( Y=Y, Cg=FreeFormCov(Y.shape[1]), Cn=FreeFormCov(Y.shape[1]), S_R=eigh_R[0], U_R=eigh_R[1], F=M, A=Ac, ) gp.covar.Cr.setCovariance(0.5 * cov(Y.T)) gp.covar.Cn.setCovariance(0.5 * cov(Y.T)) gp.optimize(verbose=verbose) lmm = MTLMM(Y, F=M, A=Ac, Asnp=Asnps, covar=gp.covar) if Asnps0 is not None: lmm0 = MTLMM(Y, F=M, A=Ac, Asnp=Asnps0, covar=gp.covar) if Asnps0 is None: lmm.process(G) RV = OrderedDict() RV["pv"] = lmm.getPv() RV["lrt"] = lmm.getLRT() else: lmm.process(G) lmm0.process(G) # compute pv lrt1 = lmm.getLRT() lrt0 = lmm0.getLRT() lrt = lrt1 - lrt0 pv = chi2(Asnps.shape[1] - Asnps0.shape[1]).sf(lrt) RV = OrderedDict() RV["pv1"] = lmm.getPv() RV["pv0"] = lmm0.getPv() RV["pv"] = pv RV["lrt1"] = lrt1 RV["lrt0"] = lrt0 RV["lrt"] = lrt return DataFrame(RV)
def run_lmm(reader, pheno, R=None, S_R=None, U_R=None, W=None, covs=None, batch_size=1000, unique_variants=False): """ Utility function to run StructLMM Parameters ---------- reader : :class:`limix.data.BedReader` limix bed reader instance. pheno : (`N`, 1) ndarray phenotype vector R : (`N`, `N`) ndarray covariance of the random effect. Typically this is the genetic relatedness matrix. If set, ``W``, ``S_R`` and ``U_R`` are ignored. S_R : (`N`, ) ndarray eigenvalues of ``R``. If available together with the eigenvectors ``U_R``, they can be provided instead of ``R`` to avoid repeated computations. Only used when ``R`` is not set. If set, ``U_R`` should also be specified. U_R : (`N`, `N`) ndarray eigenvectors of ``R``. If available together with the eigenvalues ``S_R``, they can be provided instead of ``R`` to avoid repeated computations. Only used when ``R`` is not set. If set, ``S_R`` should also be specified. W : (`N`, `K`) ndarray this defines the covariance of a lowrank random effect. Setting ``W`` is equivalent to setting ``R = dot(W, W.T)`` but ``R`` is never computed to minimize memory usage. Only used when ``R``, ``U_R`` and ``S_R`` are not set. covs : (`N`, L) ndarray fixed effect design for covariates `N` samples and `L` covariates. If None (dafault value), an intercept only is considered. batch_size : int to minimize memory usage the analysis is run in batches. The number of variants loaded in a batch (loaded into memory at the same time). no_interaction_test : bool if True the interaction test is not consdered. Teh default value is True. unique_variants : bool if True, only non-repeated genotypes are considered The default value is False. Returns ------- res : *:class:`pandas.DataFrame`* contains pv, effect size, standard error on effect size, and test statistcs as well as variant info. """ if covs is None: covs = sp.ones((pheno.shape[0], 1)) # calc S_R, U_R if R is specified if R is not None: S_R, U_R = la.eigh(R) # assert that S_R and U_R are both specified S_is = S_R is not None U_is = U_R is not None if S_is or U_is: assert S_is and U_is, 'Both U_R and S_R should be specified' # assert semidefinite positiveness if S_R is not None: if S_R.min() < 1e-4: offset = S_R.min() + 1e-4 S_R += offset warnings.warn("Added %.2e jitter to make R a SDP cov" % offset) # fit null if R is not None: from limix_core.gp import GP2KronSum from limix_core.covar import FreeFormCov Cg = FreeFormCov(1) Cn = FreeFormCov(1) gp = GP2KronSum( Y=pheno, Cg=Cg, Cn=Cn, F=covs, A=sp.eye(1), S_R=S_R, U_R=U_R) Cg.setCovariance(0.5 * sp.ones(1, 1)) Cn.setCovariance(0.5 * sp.ones(1, 1)) info_opt = gp.optimize(verbose=False) covar = gp.covar elif W is not None: from limix_core.gp import GP2KronSumLR from limix_core.covar import FreeFormCov gp = GP2KronSumLR(Y=pheno, Cn=FreeFormCov(1), G=W, F=covs, A=sp.eye(1)) gp.covar.Cr.setCovariance(0.5 * sp.ones((1, 1))) gp.covar.Cn.setCovariance(0.5 * sp.ones((1, 1))) info_opt = gp.optimize(verbose=False) covar = gp.covar else: covar = None # define lmm lmm = LMM(pheno, covs, covar) n_batches = reader.getSnpInfo().shape[0] / batch_size t0 = time.time() res = [] for i, gr in enumerate(GIter(reader, batch_size=batch_size)): print('.. batch %d/%d' % (i, n_batches)) X, _res = gr.getGenotypes(standardize=True, return_snpinfo=True) if unique_variants: X, idxs = f_univar(X, return_idxs=True) Isnp = sp.in1d(sp.arange(_res.shape[0]), idxs) _res = _res[Isnp] # run lmm lmm.process(X) pv = lmm.getPv() beta = lmm.getBetaSNP() beta_ste = lmm.getBetaSNPste() lrt = lmm.getLRT() # add pvalues, beta, etc to res _res = _res.assign(pv=pd.Series(pv, index=_res.index)) _res = _res.assign(beta=pd.Series(beta, index=_res.index)) _res = _res.assign(beta_ste=pd.Series(beta_ste, index=_res.index)) _res = _res.assign(lrt=pd.Series(lrt, index=_res.index)) res.append(_res) res = pd.concat(res) res.reset_index(inplace=True, drop=True) t = time.time() - t0 print('%.2f s elapsed' % t) return res
from limix_core.covar import FreeFormCov N = 1000 P = 4 K = 2 S = 500 Y, F, G, B0, Cg0, Cn0 = generate_data(N, P, K, S) # compute eigenvalue decomp of RRM R = sp.dot(G, G.T) R /= R.diagonal().mean() R += 1e-4 * sp.eye(R.shape[0]) Sr, Ur = la.eigh(R) # fit null model Cg = FreeFormCov(Y.shape[1]) Cn = FreeFormCov(Y.shape[1]) gp = GP2KronSum(Y=Y, S_R=Sr, U_R=Ur, Cg=Cg, Cn=Cn, F=F, A=sp.eye(P)) gp.covar.Cg.setCovariance(0.5 * sp.cov(Y.T)) gp.covar.Cn.setCovariance(0.5 * sp.cov(Y.T)) gp.optimize(factr=10) import pdb pdb.set_trace() # run MTLMM from limix_lmm.lmm_core import MTLMM mtlmm = MTLMM(Y, F=F, A=sp.eye(P), Asnp=sp.eye(P), covar=gp.covar) pv, B = mtlmm.process(G)
pheno = gaussianize(dfp.loc["gene1"].values[:, None]) # mean as fixed effect covs = sp.ones((pheno.shape[0], 1)) # fit null model wfile = "data_structlmm/env.txt" W = sp.loadtxt(wfile) W = W[:, W.std(0) > 0] W -= W.mean(0) W /= W.std(0) W /= sp.sqrt(W.shape[1]) # larn a covariance on the null model gp = GP2KronSumLR(Y=pheno, Cn=FreeFormCov(1), G=W, F=covs, A=sp.ones((1, 1))) gp.covar.Cr.setCovariance(0.5 * sp.ones((1, 1))) gp.covar.Cn.setCovariance(0.5 * sp.ones((1, 1))) info_opt = gp.optimize(verbose=False) # define lmm lmm = LMM(pheno, covs, gp.covar.solve) # define geno preprocessing function imputer = SimpleImputer(missing_values=np.nan, strategy="mean") preprocess = prep.compose([ prep.filter_by_missing(max_miss=0.10), prep.impute(imputer),
RV = sp.dot(self.U_CstarGrad_n(i).T, self.Cn.USi2().T) RV += sp.dot(self.U_Cstar().T, self.Cn.USi2grad(i).T) return RV def Sgrad_g(self, i): return sp.kron(self.S_CstarGrad_g(i), self.Sr()) def Sgrad_n(self, i): return sp.kron(self.S_CstarGrad_n(i), self.Sr()) if __name__ == '__main__': from limix_core.covar import FreeFormCov from limix_core.util.preprocess import covar_rescale # define row caoriance dim_r = 10 X = sp.rand(dim_r, dim_r) R = covar_rescale(sp.dot(X, X.T)) # define col covariances dim_c = 3 Cg = FreeFormCov(dim_c) Cn = FreeFormCov(dim_c) cov = Cov2KronSum(Cg=Cg, Cn=Cn, R=R) cov.setRandomParams() print((cov.K())) print((cov.K_grad_i(0)))
class TestGPBase(unittest.TestCase): def setUp(self): np.random.seed(1) # define phenotype N = 200 P = 2 self.Y = sp.randn(N, P) # define fixed effects self.F = [] self.A = [] self.F.append(1. * (sp.rand(N, 2) < 0.5)) self.A.append(sp.eye(P)) # define row covariance f = 10 X = 1. * (sp.rand(N, f) < 0.2) self.R = covar_rescale(sp.dot(X, X.T)) self.R += 1e-4 * sp.eye(N) # define col covariances self.Cg = FreeFormCov(P) self.Cn = FreeFormCov(P) self.Cg.setCovariance(0.5 * sp.cov(self.Y.T)) self.Cn.setCovariance(0.5 * sp.cov(self.Y.T)) # define gp self.gp = GP2KronSum(Y=self.Y, F=self.F, A=self.A, Cg=self.Cg, Cn=self.Cn, R=self.R) @unittest.skip("someone has to fix it") def test_grad(self): gp = self.gp def func(x, i): params = gp.getParams() params['covar'] = x gp.setParams(params) return gp.LML() def grad(x, i): params = gp.getParams() params['covar'] = x gp.setParams(params) grad = gp.LML_grad() return grad['covar'][i] x0 = gp.getParams()['covar'] err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0., decimal=4) def test_grad_activation(self): gp = self.gp self.Cg._K_act = False def func(x, i): params = gp.getParams() params['covar'] = x gp.setParams(params) return gp.LML() def grad(x, i): params = gp.getParams() params['covar'] = x gp.setParams(params) grad = gp.LML_grad() return grad['covar'][i] x0 = gp.getParams()['covar'] err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0., decimal=4) self.Cg._K_act = True self.Cn._K_act = False def func(x, i): params = gp.getParams() params['covar'] = x gp.setParams(params) return gp.LML() def grad(x, i): params = gp.getParams() params['covar'] = x gp.setParams(params) grad = gp.LML_grad() return grad['covar'][i] x0 = gp.getParams()['covar'] err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0., decimal=4) def test_correct_inputs(self): np.asarray(None, dtype=float)
class TestFreeForm(unittest.TestCase): def setUp(self): SP.random.seed(1) self.n = 4 self.C = FreeFormCov(self.n) self.name = 'freeform' self.n_params = self.C.getNumberParams() params = SP.randn(self.n_params) self.C.setParams(params) def test_grad(self): def func(x, i): self.C.setParams(x) return self.C.K() def grad(x, i): self.C.setParams(x) return self.C.K_grad_i(i) x0 = self.C.getParams() err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0., decimal=6) def test_param_activation(self): self.assertEqual(len(self.C.getParams()), 10) self.C.act_K = False self.assertEqual(len(self.C.getParams()), 0) self.C.setParams(np.array([])) with self.assertRaises(ValueError): self.C.setParams(np.array([0])) with self.assertRaises(ValueError): self.C.K_grad_i(0) def test_Khess(self): cov = self.C for j in range(cov.getNumberParams()): def func(x, i): cov.setParams(x) return cov.K_grad_i(j) def grad(x, i): cov.setParams(x) return cov.K_hess_i_j(j, i) x0 = cov.getParams() err = mcheck_grad(func, grad, x0) np.testing.assert_almost_equal(err, 0.)