def test_economic_qs_linear(): random = RandomState(2951) G = random.randn(3, 5) QS0 = economic_qs_linear(G) QS1 = economic_qs(dot(G, G.T)) QS2 = economic_qs_linear(G, return_q1=False) assert_allclose(QS0[0][0], QS1[0][0]) assert_allclose(QS2[0][0], QS1[0][0]) assert_equal(len(QS2[0]), 1) assert_allclose(QS0[0][1], QS1[0][1]) assert_allclose(QS0[1], QS1[1]) assert_allclose(QS2[1], QS1[1]) G = G.T.copy() QS0 = economic_qs_linear(G) QS1 = economic_qs(dot(G, G.T)) idx = argsort(-1 * QS1[1]) QS1 = ((QS1[0][0][:, idx], QS1[0][1]), QS1[1][idx]) QS2 = economic_qs_linear(G, return_q1=False) assert_allclose(QS0[0][0], QS1[0][0]) assert_allclose(QS2[0][0], QS1[0][0]) assert_equal(len(QS2[0]), 1) assert_allclose(QS0[1], QS1[1]) assert_allclose(QS2[1], QS1[1])
def test_lmm_beta_covariance(): random = RandomState(0) (y, X, G) = _full_rank(random) QS = economic_qs_linear(G) lmm = LMM(y, X, QS) lmm.fit(verbose=False) A = [ [0.015685784760937037, 0.006509918649859495], [0.006509918649859495, 0.007975242272006645], ] assert_allclose(lmm.beta_covariance, A) (y, X, G) = _low_rank(random) QS = economic_qs_linear(G) lmm = LMM(y, X[:, :2], QS) lmm.fit(verbose=False) A = [ [0.002763268929325623, 0.0006651810010328699], [0.0006651810010328708, 0.0016910004907565248], ] assert_allclose(lmm.beta_covariance, A) (y, X, G) = _low_rank(random) QS = economic_qs_linear(G) lmm = LMM(y, X, QS) lmm.fit(verbose=False) A = [ [ 0.003892850639339253, 0.0012112513279299796, 0.003892850639339256, 0.0012112513279299794, ], [ 0.0012112513279299794, 0.009340423857663259, 0.0012112513279299833, 0.009340423857663257, ], [ 0.0038928506393392562, 0.0012112513279299835, 0.003892850639339259, 0.0012112513279299833, ], [ 0.0012112513279299794, 0.009340423857663257, 0.0012112513279299833, 0.009340423857663257, ], ] assert_allclose(lmm.beta_covariance, A)
def test_fast_scanner_set_scale_multicovariates(): random = RandomState(9458) n = 10 X = _covariates_sample(random, n, n + 1) offset = 1.0 y = _outcome_sample(random, offset, X) QS = economic_qs_linear(X) M = random.randn(n, 3) lmm = LMM(y, M, QS) lmm.fit(verbose=False) markers = M.copy() scanner = lmm.get_fast_scanner() r = scanner.fast_scan(markers, verbose=False) want = [-19.318845, -19.318845, -19.318845] assert_allclose(r["lml"], want, rtol=1e-6, atol=1e-6) assert_allclose( r["effsizes0"][2], [-0.6923007382350215, 2.3550810825973034, -0.38157769653894497], rtol=1e-5, ) want = [-0.34615, 1.177541, -0.381578] assert_allclose(r["effsizes1"], want, rtol=1e-6, atol=1e-6) assert_allclose(r["scale"], [1.0, 1.0, 1.0])
def test_binomial_optimize(): random = RandomState(139) nsamples = 30 nfeatures = 31 G = random.randn(nsamples, nfeatures) / sqrt(nfeatures) u = random.randn(nfeatures) z = 0.1 + 2 * dot(G, u) + random.randn(nsamples) ntrials = random.randint(10, 500, size=nsamples) y = zeros(nsamples) for i in range(len(ntrials)): y[i] = sum( z[i] + random.logistic(scale=pi / sqrt(3), size=ntrials[i]) > 0) (Q, S0) = economic_qs_linear(G) M = ones((nsamples, 1)) lik = BinomialProdLik(ntrials, LogitLink()) lik.nsuccesses = y ep = ExpFamEP(lik, M, Q[0], Q[1], S0) ep.learn(progress=False) assert_allclose(ep.lml(), -144.2381842202486, rtol=1e-3)
def test_lmm_scan_fast_scan(): random = RandomState(9458) n = 30 X = _covariates_sample(random, n, n + 1) offset = 1.0 y = _outcome_sample(random, offset, X) QS = economic_qs_linear(X) M0 = random.randn(n, 2) M1 = random.randn(n, 2) lmm = LMM(y, M0, QS) lmm.fit(verbose=False) v0 = lmm.v0 v1 = lmm.v1 K = v0 * X @ X.T + v1 * eye(n) M = concatenate((M0, M1[:, [0]]), axis=1) def fun(x): beta = x[:3] scale = exp(x[3]) return -st.multivariate_normal(M @ beta, scale * K).logpdf(y) res = minimize(fun, [0, 0, 0, 0]) scanner = lmm.get_fast_scanner() r = scanner.fast_scan(M1, verbose=False) assert_allclose(r["lml"][0], -res.fun) assert_allclose(r["effsizes0"][0], res.x[:2], rtol=1e-5) assert_allclose(r["effsizes1"][0], res.x[2:3], rtol=1e-5) assert_allclose(r["scale"][0], exp(res.x[3]), rtol=1e-5)
def test_binomial_gradient_over_delta(): n = 3 M = ones((n, 1)) * 1. G = array([[1.2, 3.4], [-.1, 1.2], [0.0, .2]]) (Q, S0) = economic_qs_linear(G) nsuccesses = array([1., 0., 1.]) ntrials = array([1., 1., 1.]) lik = BinomialProdLik(ntrials, LogitLink()) lik.nsuccesses = nsuccesses ep = ExpFamEP(lik, M, Q[0], Q[1], S0 + 1.0) ep.beta = array([1.]) assert_allclose(ep.beta, array([1.])) ep.v = 1. ep.delta = 0.5 analytical_gradient = ep._gradient_over_delta() lml0 = ep.lml() step = 1e-5 ep.delta = ep.delta + step lml1 = ep.lml() empirical_gradient = (lml1 - lml0) / step assert_allclose(empirical_gradient, analytical_gradient, rtol=1e-4)
def test_fast_scanner_set_scale_1covariate(): random = RandomState(9458) n = 10 X = _covariates_sample(random, n, n + 1) offset = 1.0 y = _outcome_sample(random, offset, X) QS = economic_qs_linear(X) M = random.randn(n, 1) lmm = LMM(y, M, QS) lmm.fit(verbose=False) assert_allclose(lmm.scale, 5.282731934070453) assert_allclose(lmm.delta, 0.7029974630034005) assert_allclose(lmm.beta, [0.0599712498212]) markers = M.copy() + random.randn(n, 1) scanner = lmm.get_fast_scanner() r = scanner.fast_scan(markers, verbose=False) assert_allclose(r["lml"], [-21.509721], rtol=1e-6) assert_allclose(r["effsizes0"], [[-1.43206379971882]]) assert_allclose(r["effsizes1"], [1.412239], rtol=1e-6) assert_allclose(r["scale"], [0.8440354018505616], rtol=1e-6) beta = lmm.beta assert_allclose( scanner.fast_scan(zeros((10, 1)), verbose=False)["effsizes0"][0], beta )
def test_bernoulli_optimize(): random = RandomState(139) nsamples = 100 nfeatures = nsamples + 10 G = random.randn(nsamples, nfeatures) / sqrt(nfeatures) M = ones((nsamples, 1)) u = random.randn(nfeatures) z = 0.1 + dot(G, u) + 0.5 * random.randn(nsamples) y = empty(nsamples) y[z > 0] = 1 y[z <= 0] = 0 (Q, S0) = economic_qs_linear(G) lik = BernoulliProdLik(LogitLink()) lik.outcome = y ep = ExpFamEP(lik, M, Q[0], Q[1], S0) ep.learn(progress=False) assert_allclose(ep.lml(), -67.67727582268618, rtol=1e-5) assert_allclose(ep.heritability, 0.6243068813130619, rtol=1e-5) assert_allclose(ep.beta[0], -0.2561108097463372, rtol=1e-5)
def test_fast_scanner_set_scale_1covariate_redundant(): random = RandomState(9458) n = 10 X = _covariates_sample(random, n, n + 1) offset = 1.0 y = _outcome_sample(random, offset, X) QS = economic_qs_linear(X) M = random.randn(n, 1) lmm = LMM(y, M, QS) lmm.fit(verbose=False) markers = M.copy() scanner = lmm.get_fast_scanner() r = scanner.fast_scan(markers, verbose=False) assert_allclose(r["lml"][0], -22.357525517597185, rtol=1e-6) assert_allclose(r["effsizes0"], [[0.029985622694805182]]) assert_allclose(r["effsizes1"][0], 0.02998562491058301, rtol=1e-6, atol=1e-6) assert_allclose(r["scale"], [1.0], rtol=1e-6)
def test_lmm_interface(): random = RandomState(1) n = 3 G = random.randn(n, n + 1) X = random.randn(n, 2) y = X @ random.randn(2) + G @ random.randn(G.shape[1]) + random.randn(n) y -= y.mean(0) y /= y.std(0) QS = economic_qs_linear(G) lmm = LMM(y, X, QS, restricted=False) lmm.name = "lmm" lmm.fit(verbose=False) assert_allclose( lmm.covariance(), [ [0.436311031439718, 2.6243891396439837e-16, 2.0432156171727483e-16], [2.6243891396439837e-16, 0.4363110314397185, 4.814313140426306e-16], [2.0432156171727483e-16, 4.814313140426305e-16, 0.43631103143971817], ], atol=1e-7, ) assert_allclose( lmm.mean(), [0.6398184791042468, -0.8738254794097052, 0.7198112606871158], atol=1e-7, ) assert_allclose(lmm.lml(), -3.012715726960625, atol=1e-7) assert_allclose(lmm.value(), lmm.lml(), atol=1e-7) assert_allclose(lmm.lml(), -3.012715726960625, atol=1e-7) assert_allclose( lmm.X, [ [-0.3224172040135075, -0.38405435466841564], [1.1337694423354374, -1.0998912673140309], [-0.17242820755043575, -0.8778584179213718], ], atol=1e-7, ) assert_allclose(lmm.beta, [-1.3155159120000266, -0.5615702941530938], atol=1e-7) assert_allclose( lmm.beta_covariance, [ [0.44737305797088345, 0.20431961864892412], [0.20431961864892412, 0.29835835133251526], ], atol=1e-7, ) assert_allclose(lmm.delta, 0.9999999999999998, atol=1e-7) assert_equal(lmm.ncovariates, 2) assert_equal(lmm.nsamples, 3) assert_allclose(lmm.scale, 0.43631103143971767, atol=1e-7) assert_allclose(lmm.v0, 9.688051060046502e-17, atol=1e-7) assert_allclose(lmm.v1, 0.43631103143971756, atol=1e-7) assert_equal(lmm.name, "lmm") with pytest.raises(NotImplementedError): lmm.gradient()
def test_lmm_scan(): random = RandomState(9458) n = 30 X = _covariates_sample(random, n, n + 1) offset = 1.0 y = _outcome_sample(random, offset, X) QS = economic_qs_linear(X) M0 = random.randn(n, 2) M1 = random.randn(n, 2) lmm = LMM(y, M0, QS) lmm.fit(verbose=False) v0 = lmm.v0 v1 = lmm.v1 K = v0 * X @ X.T + v1 * eye(n) M = concatenate((M0, M1), axis=1) def fun(x): beta = x[:4] scale = exp(x[4]) return -st.multivariate_normal(M @ beta, scale * K).logpdf(y) res = minimize(fun, [0, 0, 0, 0, 0]) scanner = lmm.get_fast_scanner() r = scanner.scan(M1) assert_allclose(r["lml"], -res.fun) assert_allclose(r["effsizes0"], res.x[:2], rtol=1e-5) assert_allclose(r["effsizes1"], res.x[2:4], rtol=1e-5) assert_allclose(r["scale"], exp(res.x[4]), rtol=1e-5) K = r["scale"] * lmm.covariance() M = concatenate((M0, M1), axis=1) effsizes_se = sqrt(inv(M.T @ solve(K, M)).diagonal()) assert_allclose(effsizes_se, concatenate((r["effsizes0_se"], r["effsizes1_se"]))) assert_allclose(scanner.null_lml(), -53.805721275578456, rtol=1e-5) assert_allclose(scanner.null_beta, [0.26521964226797085, 0.4334778669761928], rtol=1e-5) assert_allclose( scanner.null_beta_covariance, [ [0.06302553593799207, 0.00429640179038484], [0.004296401790384839, 0.05591392416235412], ], rtol=1e-5, ) assert_allclose(scanner.null_scale, 1.0) assert_allclose(scanner.null_beta, lmm.beta, rtol=1e-5) assert_allclose(scanner.null_beta_covariance, lmm.beta_covariance, rtol=1e-5)
def _background_decomposition(G, K): if G is None: (Q, S0) = economic_qs(K) else: (Q, S0) = economic_qs_linear(G) Q0 = Q[0] Q1 = Q[1] S0 /= S0.mean() return Q0, Q1, S0
def test_poisson_lml(): n = 3 M = ones((n, 1)) * 1. G = array([[1.2, 3.4], [-.1, 1.2], [0.0, .2]]) (Q, S0) = economic_qs_linear(G) noccurrences = array([1., 0., 5.]) lik = PoissonProdLik(LogLink()) lik.noccurrences = noccurrences ep = ExpFamEP(lik, M, Q[0], Q[1], S0 + 1) ep.beta = array([1.]) assert_almost_equal(ep.beta, array([1.])) ep.v = 1. ep.delta = 0 assert_almost_equal(ep.lml(), -6.793765561069963)
def test_binomial_lml(): n = 3 M = ones((n, 1)) * 1. G = array([[1.2, 3.4], [-.1, 1.2], [0.0, .2]]) (Q, S0) = economic_qs_linear(G) nsuccesses = array([1., 0., 1.]) ntrials = array([1., 1., 1.]) lik = BinomialProdLik(ntrials, LogitLink()) lik.nsuccesses = nsuccesses ep = ExpFamEP(lik, M, Q[0], Q[1], S0 + 1) ep.beta = array([1.]) assert_allclose(ep.beta, array([1.])) ep.v = 1. ep.delta = 0 assert_allclose(ep.lml(), -2.3202659215368935)
def calc_lml(self, Env): from numpy import ones, concatenate from glimix_core.lmm import LMM from numpy_sugar.linalg import economic_qs_linear _covs = concatenate([self.F, self.W, self.x], 1) if Env.shape[1] == 0: xoE = ones(self.x.shape) else: xoE = self.x * Env QS = economic_qs_linear(xoE) gp = LMM(self.y, _covs, QS, restricted=True) gp.fit(verbose=False) return gp.lml()
def _fit_cis_herit(y, K_cis, X=None, compute_lrt=True): log = logging.getLogger(pyfocus.LOG) try: from glimix_core.lmm import LMM from numpy_sugar.linalg import economic_qs_linear except ImportError as ie: log.error( "Training submodule requires glimix-core>=2.0.0 and numpy-sugar to be installed." ) raise from scipy.stats import norm from scipy.linalg import lstsq if X is None: X = np.ones((len(y), 1)) K_cis = economic_qs_linear(K_cis) lmm = LMM(y, X, K_cis) lmm.fit(verbose=False) fixed_betas = lmm.beta logl_1 = lmm.lml() cis_scale = lmm.v0 noise_scale = lmm.v1 fe_scale = lmm.fixed_effects_variance if compute_lrt: n, p = X.shape # reduced model is just OLS regression for fixed-effects fixed_betas_0, sosqs, ranks, svals = lstsq(X, y) s2e = sosqs / len( y ) # LMM also uses MLE estimation, so don't adjust for bias right now logl_0 = np.sum( norm.logpdf(y, loc=np.dot(X, fixed_betas_0), scale=np.sqrt(s2e))) pval = _lrt_pvalue(logl_0, logl_1) log.debug("Estimated cis-h2g = {} (P = {})".format( cis_scale / (cis_scale + noise_scale + fe_scale), pval)) else: pval = None log.debug("Estimated cis-h2g = {}".format( cis_scale / (cis_scale + noise_scale + fe_scale))) return fe_scale, cis_scale, noise_scale, logl_1, fixed_betas, pval
def test_bernoulli_exceptions(): from limix_inference.random import bernoulli_sample from limix_inference.glmm import ExpFamEP from limix_inference.lik import BernoulliProdLik from limix_inference.link import LogLink from numpy_sugar.linalg import economic_qs_linear from numpy.random import RandomState offset = 5 G = [[1, -1], [2, 1]] (Q0, Q1), S0 = economic_qs_linear(G) y = bernoulli_sample(offset, G, random_state=RandomState(0)) covariates = [[1.], [0.6]] lik = BernoulliProdLik(LogLink) lik.outcome = y glmm = ExpFamEP(lik, covariates, Q0, Q1, S0)
def test_bernoulli_lml(): n = 3 M = ones((n, 1)) * 1. G = array([[1.2, 3.4], [-.1, 1.2], [0.0, .2]]) (Q, S0) = economic_qs_linear(G) y = array([1., 0., 1.]) lik = BernoulliProdLik(LogitLink()) lik.outcome = y ep = ExpFamEP(lik, M, Q[0], Q[1], S0 + 1.0) ep.beta = array([1.]) assert_almost_equal(ep.beta, array([1.])) ep.v = 1. ep.delta = 0. assert_almost_equal(ep.lml(), -2.3202659215368935) assert_almost_equal(ep.sigma2_epsilon, 0) assert_almost_equal(ep.sigma2_b, 1)
def test_fast_scan(): random = np.random.RandomState(9458) N = 500 X = random.randn(N, N + 1) X -= X.mean(0) X /= X.std(0) X /= np.sqrt(X.shape[1]) offset = 1.0 mean = OffsetMean() mean.offset = offset mean.set_data(N, purpose='sample') cov_left = LinearCov() cov_left.scale = 1.5 cov_left.set_data((X, X), purpose='sample') cov_right = EyeCov() cov_right.scale = 1.5 cov_right.set_data((arange(N), arange(N)), purpose='sample') cov = SumCov([cov_left, cov_right]) lik = DeltaProdLik() y = GLMMSampler(lik, mean, cov).sample(random) (Q0, Q1), S0 = economic_qs_linear(X) flmm = FastLMM(y, Q0, Q1, S0, covariates=ones((N, 1))) flmm.learn(progress=False) markers = random.randn(N, 2) flmm_ = flmm.copy() flmm_.M = concatenate([flmm.M, markers[:, 0][:, newaxis]], axis=1) lml0 = flmm_.lml() flmm_ = flmm.copy() flmm_.M = concatenate([flmm.M, markers[:, 1][:, newaxis]], axis=1) lml1 = flmm_.lml() lik_trick = flmm.get_normal_likelihood_trick() lmls = lik_trick.fast_scan(markers)[0] assert_allclose(lmls, [lml0, lml1], rtol=1e-5)
def test_fast_scanner_redundant_candidates(): random = RandomState(9458) n = 10 X = _covariates_sample(random, n, n + 1) offset = 1.0 y = _outcome_sample(random, offset, X) QS = economic_qs_linear(X) M = ones((n, 5)) lmm = LMM(y, M, QS, restricted=False) lmm.fit(verbose=False) markers = M.copy() scanner = lmm.get_fast_scanner() scanner.fast_scan(markers, verbose=False)
def test_glmmexpfam_bernoulli_probit_assure_delta_fixed(): random = RandomState(1) N = 10 G = random.randn(N, N + 50) y = bernoulli_sample(0.0, G, random_state=random) G = ascontiguousarray(G, dtype=float) _stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) QS = economic_qs_linear(G) S0 = QS[1] S0 /= S0.mean() X = ones((len(y), 1)) model = GLMMExpFam(y, "probit", X, QS=(QS[0], QS[1])) model.fit(verbose=False) assert_allclose(model.lml(), -6.108751595773174, rtol=RTOL) assert_allclose(model.delta, 1.4901161193847673e-08, atol=1e-5) assert_(model._isfixed("logitdelta"))
def calc_opt_rho(self): import scipy as sp from glimix_core.lmm import LMM from numpy_sugar.linalg import economic_qs_linear _covs = sp.concatenate([self.F, self.W, self.x], 1) xoE = self.x * self.Env QS = economic_qs_linear(xoE) gp = LMM(self.y, _covs, QS, restricted=True) gp.fit(verbose=False) # variance heterogenenty var_xEEx = ((xoE - xoE.mean(0)) ** 2).sum() var_xEEx /= float(self.y.shape[0] - 1) v_het = gp.v0 * var_xEEx # variance persistent v_comm = sp.var(gp.beta[-1] * self.x) rho = v_het / (v_comm + v_het) return rho
def test_glmmexpfam_bernoulli_probit_problematic(): random = RandomState(1) N = 30 G = random.randn(N, N + 50) y = bernoulli_sample(0.0, G, random_state=random) G = ascontiguousarray(G, dtype=float) _stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) QS = economic_qs_linear(G) S0 = QS[1] S0 /= S0.mean() X = ones((len(y), 1)) model = GLMMExpFam(y, "probit", X, QS=(QS[0], QS[1])) model.delta = 0 model.fix("delta") model.fit(verbose=False) assert_allclose(model.lml(), -20.725623168378615, atol=ATOL, rtol=RTOL) assert_allclose(model.delta, 0.0001220703125, atol=1e-3) assert_allclose(model.scale, 0.33022865011938707, atol=ATOL, rtol=RTOL) assert_allclose(model.beta, [-0.002617161564786044], atol=ATOL, rtol=RTOL) h20 = model.scale * (1 - model.delta) / (model.scale + 1) model.unfix("delta") model.delta = 0.5 model.scale = 1.0 model.fit(verbose=False) assert_allclose(model.lml(), -20.725623168378522, atol=ATOL, rtol=RTOL) assert_allclose(model.delta, 0.5017852859580029, atol=1e-3) assert_allclose(model.scale, 0.9928931515372, atol=ATOL, rtol=RTOL) assert_allclose(model.beta, [-0.003203427206253548], atol=ATOL, rtol=RTOL) h21 = model.scale * (1 - model.delta) / (model.scale + 1) assert_allclose(h20, h21, atol=ATOL, rtol=RTOL)
def test_lmm_predict(): random = RandomState(9458) n = 30 X = random.randn(n, n + 1) X -= X.mean(0) X /= X.std(0) X /= sqrt(X.shape[1]) offset = 1.0 mean = OffsetMean(n) mean.offset = offset cov_left = LinearCov(X) cov_left.scale = 1.5 cov_right = EyeCov(n) cov_right.scale = 1.5 cov = SumCov([cov_left, cov_right]) lik = DeltaProdLik() y = GGPSampler(lik, mean, cov).sample(random) QS = economic_qs_linear(X) lmm = LMM(y, ones((n, 1)), QS) lmm.fit(verbose=False) plmm = LMMPredict(y, lmm.beta, lmm.v0, lmm.v1, lmm.mean(), lmm.covariance()) K = dot(X, X.T) pm = plmm.predictive_mean(ones((n, 1)), K, K.diagonal()) assert_allclose(corrcoef(y, pm)[0, 1], 0.8358820971891354)
def test_glmmexpfam_bernoulli_problematic(): random = RandomState(1) N = 30 G = random.randn(N, N + 50) y = bernoulli_sample(0.0, G, random_state=random) G = ascontiguousarray(G, dtype=float) _stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) QS = economic_qs_linear(G) S0 = QS[1] S0 /= S0.mean() X = ones((len(y), 1)) model = GLMMExpFam(y, "bernoulli", X, QS=(QS[0], QS[1])) model.delta = 0 model.fix("delta") model.fit(verbose=False) assert_allclose(model.lml(), -20.727007958026853, atol=ATOL, rtol=RTOL) assert_allclose(model.delta, 0, atol=1e-3) assert_allclose(model.scale, 0.879915823030081, atol=ATOL, rtol=RTOL) assert_allclose(model.beta, [-0.00247856564728], atol=ATOL, rtol=RTOL)
def _genetic_preprocess(X, G, K, background): logger = logging.getLogger(__name__) logger.info("Number of candidate markers to scan: %d", X.shape[1]) if K is not None: background.provided_via_variants = False logger.info('Covariace matrix normalization.') gower_normalization(K, out=K) if G is not None: background.provided_via_variants = True background.nvariants = G.shape[1] background.constant_nvariants = sum(G.std(0) == 0) logger.info('Genetic markers normalization.') stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) if G is None and K is None: raise Exception('G and K cannot be both None.') logger.info('Computing the economic eigen decomposition.') if K is None: QS = economic_qs_linear(G) else: QS = economic_qs(K) Q0, Q1 = QS[0] S0 = QS[1] background.background_rank = len(S0) logger.info('Genetic marker candidates normalization.') stdnorm(X, 0, out=X) X /= sqrt(X.shape[1]) return (Q0, Q1, S0)
def test_binomial_get_normal_likelihood_trick(): random = RandomState(139) nsamples = 30 nfeatures = 31 G = random.randn(nsamples, nfeatures) / sqrt(nfeatures) u = random.randn(nfeatures) z = 0.1 + 2 * dot(G, u) + random.randn(nsamples) ntrials = random.randint(10, 500, size=nsamples) y = zeros(nsamples) for i in range(len(ntrials)): y[i] = sum( z[i] + random.logistic(scale=pi / sqrt(3), size=ntrials[i]) > 0) (Q, S0) = economic_qs_linear(G) M = ones((nsamples, 1)) lik = BinomialProdLik(ntrials, LogitLink()) lik.nsuccesses = y ep = ExpFamEP(lik, M, Q[0], Q[1], S0) ep.learn(progress=False) nlt = ep.get_normal_likelihood_trick() assert_allclose(nlt.fast_scan(G)[0], [ -143.48903288, -144.32031587, -144.03889888, -144.31806561, -143.90248659, -144.303103, -144.47854112, -144.44469341, -144.285027, -144.31240175, -143.11590263, -142.81623878, -141.67554141, -144.4780024, -144.47780285, -144.10317082, -142.10043322, -143.0813298, -143.99841663, -143.345783, -144.45458683, -144.37877612, -142.56846859, -144.32923028, -144.44116855, -144.45082936, -144.40932741, -143.0212886, -144.47902176, -143.94188634, -143.72765373 ], rtol=1e-5)
def test_bernoulli_gradient_over_v(): n = 3 M = ones((n, 1)) * 1. G = array([[1.2, 3.4], [-.1, 1.2], [0.0, .2]]) (Q, S0) = economic_qs_linear(G) y = array([1., 0., 1.]) lik = BernoulliProdLik(LogitLink()) lik.outcome = y ep = ExpFamEP(lik, M, Q[0], Q[1], S0 + 1.0) ep.beta = array([1.]) assert_almost_equal(ep.beta, array([1.])) ep.v = 1. ep.delta = 0. analytical_gradient = ep._gradient_over_v() lml0 = ep.lml() step = 1e-5 ep.v = ep.v + step lml1 = ep.lml() empirical_gradient = (lml1 - lml0) / step assert_almost_equal(empirical_gradient, analytical_gradient, decimal=4)
def test_poisson_optimize(): random = RandomState(139) nsamples = 30 nfeatures = 31 G = random.randn(nsamples, nfeatures) / sqrt(nfeatures) u = random.randn(nfeatures) z = 0.1 + 2 * dot(G, u) + random.randn(nsamples) y = zeros(nsamples) for i in range(nsamples): y[i] = random.poisson(lam=exp(z[i])) (Q0, Q1), S0 = economic_qs_linear(G) M = ones((nsamples, 1)) lik = PoissonProdLik(LogLink()) lik.noccurrences = y ep = ExpFamEP(lik, M, Q0, Q1, S0) ep.learn() assert_almost_equal(ep.lml(), -77.90919831238075, decimal=2) assert_almost_equal(ep.beta[0], 0.314709077094, decimal=1) assert_almost_equal(ep.heritability, 0.797775054939, decimal=1)
def test_learn(): random = np.random.RandomState(9458) N = 500 X = random.randn(N, N + 1) X -= X.mean(0) X /= X.std(0) X /= np.sqrt(X.shape[1]) offset = 1.0 mean = OffsetMean() mean.offset = offset mean.set_data(N, purpose='sample') cov_left = LinearCov() cov_left.scale = 1.5 cov_left.set_data((X, X), purpose='sample') cov_right = EyeCov() cov_right.scale = 1.5 cov_right.set_data((arange(N), arange(N)), purpose='sample') cov = SumCov([cov_left, cov_right]) lik = DeltaProdLik() y = GLMMSampler(lik, mean, cov).sample(random) (Q0, Q1), S0 = economic_qs_linear(X) flmm = FastLMM(y, Q0, Q1, S0, covariates=ones((N, 1))) flmm.learn(progress=False) assert_allclose(flmm.beta[0], 0.8997652129631661, rtol=1e-5) assert_allclose(flmm.genetic_variance, 1.7303981309775553, rtol=1e-5) assert_allclose(flmm.environmental_variance, 1.2950028351268132, rtol=1e-5)
def _test_lmm(random, y, X, G, mvn, restricted): c = X.shape[1] QS = economic_qs_linear(G) lmm = LMM(y, X, QS, restricted=restricted) beta = lmm.beta v0 = lmm.v0 v1 = lmm.v1 K0 = G @ G.T assert_allclose(lmm.lml(), mvn(beta, v0, v1, y, X, K0)) beta = random.randn(c) lmm.beta = beta assert_allclose(lmm.lml(), mvn(beta, v0, v1, y, X, K0)) delta = random.rand(1).item() lmm.delta = delta v0 = lmm.v0 v1 = lmm.v1 assert_allclose(lmm.lml(), mvn(beta, v0, v1, y, X, K0)) scale = random.rand(1).item() lmm.scale = scale v0 = lmm.v0 v1 = lmm.v1 assert_allclose(lmm.lml(), mvn(beta, v0, v1, y, X, K0)) def fun(x): beta = x[:c] v0 = exp(x[c]) v1 = exp(x[c + 1]) return -mvn(beta, v0, v1, y, X, K0) res = minimize(fun, [0] * c + [0, 0]) lmm.fit(verbose=False) assert_allclose(lmm.lml(), -res.fun, rtol=1e-3, atol=1e-6) assert_allclose(lmm.beta, res.x[:c], rtol=1e-3, atol=1e-6) assert_allclose(lmm.v0, exp(res.x[c]), rtol=1e-3, atol=1e-6) assert_allclose(lmm.v1, exp(res.x[c + 1]), rtol=1e-3, atol=1e-6) lmm = LMM(y, X, QS, restricted=restricted) beta = random.randn(c) lmm.beta = beta lmm.delta = random.rand(1).item() lmm.scale = random.rand(1).item() lmm.fix("beta") def fun(x): v0 = exp(x[0]) v1 = exp(x[1]) return -mvn(beta, v0, v1, y, X, K0) res = minimize(fun, [0, 0]) lmm.fit(verbose=False) assert_allclose(lmm.lml(), -res.fun, rtol=1e-3, atol=1e-6) assert_allclose(lmm.v0, exp(res.x[0]), rtol=1e-3, atol=1e-6) assert_allclose(lmm.v1, exp(res.x[1]), rtol=1e-3, atol=1e-6) lmm = LMM(y, X, QS, restricted=restricted) lmm.beta = random.randn(c) delta = random.rand(1).item() lmm.delta = delta lmm.scale = random.rand(1).item() lmm.fix("delta") def fun(x): beta = x[:c] scale = exp(x[c]) v0 = scale * (1 - delta) v1 = scale * delta return -mvn(beta, v0, v1, y, X, K0) res = minimize(fun, [0] * c + [0]) lmm.fit(verbose=False) assert_allclose(lmm.lml(), -res.fun, rtol=1e-5, atol=1e-6) assert_allclose(lmm.beta, res.x[:c], rtol=1e-5, atol=1e-6) assert_allclose(lmm.scale, exp(res.x[c]), rtol=1e-5, atol=1e-6) lmm = LMM(y, X, QS, restricted=restricted) lmm.beta = random.randn(c) lmm.delta = random.rand(1).item() scale = random.rand(1).item() lmm.scale = scale lmm.fix("scale") def fun(x): beta = x[:c] delta = 1 / (1 + exp(-x[c])) v0 = scale * (1 - delta) v1 = scale * delta return -mvn(beta, v0, v1, y, X, K0) res = minimize(fun, [0] * c + [0]) lmm.fit(verbose=False) assert_allclose(lmm.lml(), -res.fun, rtol=1e-5, atol=1e-6) assert_allclose(lmm.beta, res.x[:c], rtol=1e-3, atol=1e-6) assert_allclose(lmm.delta, 1 / (1 + exp(-res.x[c])), rtol=1e-3, atol=1e-6)