def test_glmmexpfam_optimize(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() z = random.multivariate_normal(0.2 * ones(nsamples), K) QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = zeros(nsamples, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) assert_allclose(glmm.lml(), -29.102168129099287, atol=ATOL, rtol=RTOL) glmm.fix("beta") glmm.fix("scale") glmm.fit(verbose=False) assert_allclose(glmm.lml(), -27.635788105778012, atol=ATOL, rtol=RTOL) glmm.unfix("beta") glmm.unfix("scale") glmm.fit(verbose=False) assert_allclose(glmm.lml(), -19.68486269551159, atol=ATOL, rtol=RTOL)
def test_glmmexpfam_predict(): random = RandomState(4) n = 100 p = n + 1 X = ones((n, 2)) X[:, 1] = random.randn(n) G = random.randn(n, p) G /= G.std(0) G -= G.mean(0) G /= sqrt(p) K = dot(G, G.T) i = asarray(arange(0, n), int) si = random.choice(i, n, replace=False) ntest = int(n // 5) itrain = si[:-ntest] itest = si[-ntest:] Xtrain = X[itrain, :] Ktrain = K[itrain, :][:, itrain] Xtest = X[itest, :] beta = random.randn(2) z = random.multivariate_normal(dot(X, beta), 0.9 * K + 0.1 * eye(n)) ntri = random.randint(1, 100, n) nsuc = zeros(n, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) QStrain = economic_qs(Ktrain) nsuc_train = ascontiguousarray(nsuc[itrain]) ntri_train = ascontiguousarray(ntri[itrain]) nsuc_test = ascontiguousarray(nsuc[itest]) ntri_test = ascontiguousarray(ntri[itest]) glmm = GLMMExpFam(nsuc_train, ("binomial", ntri_train), Xtrain, QStrain) glmm.fit(verbose=False) ks = K[itest, :][:, itrain] kss = asarray([K[i, i] for i in itest]) pm = glmm.predictive_mean(Xtest, ks, kss) pk = glmm.predictive_covariance(Xtest, ks, kss) r = nsuc_test / ntri_test assert_(corrcoef([pm, r])[0, 1] > 0.8) assert_allclose(pk[0], 54.263705682514846)
def _fit_glmm_simple_model(self, verbose): from numpy_sugar.linalg import economic_qs from glimix_core.glmm import GLMMExpFam from numpy import asarray K = self._get_matrix_simple_model() y = asarray(self._y, float).ravel() QS = None if K is not None: QS = economic_qs(K) glmm = GLMMExpFam(y, self._lik, self._M, QS) glmm.fit(verbose=verbose) self._set_simple_model_variances(glmm.v0, glmm.v1) self._glmm = glmm
def test_glmmexpfam_poisson(): from numpy import ones, stack, exp, zeros from numpy.random import RandomState from numpy_sugar.linalg import economic_qs from pandas import DataFrame random = RandomState(1) # sample size n = 30 # covariates offset = ones(n) * random.randn() age = random.randint(16, 75, n) M = stack((offset, age), axis=1) M = DataFrame(stack([offset, age], axis=1), columns=["offset", "age"]) M["sample"] = [f"sample{i}" for i in range(n)] M = M.set_index("sample") # genetic variants G = random.randn(n, 4) # sampling the phenotype alpha = random.randn(2) beta = random.randn(4) eps = random.randn(n) y = M @ alpha + G @ beta + eps # Whole genotype of each sample. X = random.randn(n, 50) # Estimate a kinship relationship between samples. X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1]) K = X_ @ X_.T + eye(n) * 0.1 # Update the phenotype y += random.multivariate_normal(zeros(n), K) y = (y - y.mean()) / y.std() z = y.copy() y = random.poisson(exp(z)) M = M - M.mean(0) QS = economic_qs(K) glmm = GLMMExpFam(y, "poisson", M, QS) assert_allclose(glmm.lml(), -52.479557279193585) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -34.09720756737648)
def _glmm(y, lik, M, QS, verbose): from glimix_core.glmm import GLMMExpFam, GLMMNormal glmm = GLMMExpFam(y.ravel(), lik, M, QS) glmm.fit(verbose=verbose) v0 = glmm.v0 v1 = glmm.v1 sys.stdout.flush() eta = glmm.site.eta tau = glmm.site.tau gnormal = GLMMNormal(eta, tau, M, QS) gnormal.fit(verbose=verbose) scanner = ScannerWrapper(gnormal.get_fast_scanner()) return scanner, v0, v1
def test_glmmexpfam_binomial_large_ntrials(): random = RandomState(0) n = 10 X = random.randn(n, 2) G = random.randn(n, 100) K = dot(G, G.T) ntrials = random.randint(1, 100000, n) z = dot(G, random.randn(100)) / sqrt(100) successes = zeros(len(ntrials), int) for i in range(len(ntrials)): for _ in range(ntrials[i]): successes[i] += int(z[i] + 0.1 * random.randn() > 0) QS = economic_qs(K) glmm = GLMMExpFam(successes, ("binomial", ntrials), X, QS) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -43.067433588125446)
def test_glmmexpfam_optimize_low_rank(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = dot(X, X.T) z = dot(X, 0.2 * random.randn(5)) QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = zeros(nsamples, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) assert_allclose(glmm.lml(), -18.60476792256323, atol=ATOL, rtol=RTOL) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -7.800621320491801, atol=ATOL, rtol=RTOL)
def test_glmmexpfam_bernoulli_probit_assure_delta_fixed(): random = RandomState(1) N = 10 G = random.randn(N, N + 50) y = bernoulli_sample(0.0, G, random_state=random) G = ascontiguousarray(G, dtype=float) _stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) QS = economic_qs_linear(G) S0 = QS[1] S0 /= S0.mean() X = ones((len(y), 1)) model = GLMMExpFam(y, "probit", X, QS=(QS[0], QS[1])) model.fit(verbose=False) assert_allclose(model.lml(), -6.108751595773174, rtol=RTOL) assert_allclose(model.delta, 1.4901161193847673e-08, atol=1e-5) assert_(model._isfixed("logitdelta"))
def _perform_glmm(y, lik, M, K, QS, G, verbose): from glimix_core.glmm import GLMMExpFam, GLMMNormal from pandas import Series from xarray import DataArray glmm = GLMMExpFam(y.ravel(), lik, M.values, QS) glmm.fit(verbose=verbose) sys.stdout.flush() eta = glmm.site.eta tau = glmm.site.tau gnormal = GLMMNormal(eta, tau, M.values, QS) gnormal.fit(verbose=verbose) beta = gnormal.beta covariates = list(M.coords["covariate"].values) ncov_effsizes = Series(beta, covariates) flmm = gnormal.get_fast_scanner() flmm.set_scale(1.0) null_lml = flmm.null_lml() if hasattr(G, "data"): values = G.data else: values = G.values alt_lmls, effsizes = flmm.fast_scan(values, verbose=verbose) coords = { k: ("candidate", G.coords[k].values) for k in G.coords.keys() if G.coords[k].dims[0] == "candidate" } alt_lmls = DataArray(alt_lmls, dims=["candidate"], coords=coords) effsizes = DataArray(effsizes, dims=["candidate"], coords=coords) return QTLModel(null_lml, alt_lmls, effsizes, ncov_effsizes)
def test_glmmexpfam_bernoulli_probit_problematic(): random = RandomState(1) N = 30 G = random.randn(N, N + 50) y = bernoulli_sample(0.0, G, random_state=random) G = ascontiguousarray(G, dtype=float) _stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) QS = economic_qs_linear(G) S0 = QS[1] S0 /= S0.mean() X = ones((len(y), 1)) model = GLMMExpFam(y, "probit", X, QS=(QS[0], QS[1])) model.delta = 0 model.fix("delta") model.fit(verbose=False) assert_allclose(model.lml(), -20.725623168378615, atol=ATOL, rtol=RTOL) assert_allclose(model.delta, 0.0001220703125, atol=1e-3) assert_allclose(model.scale, 0.33022865011938707, atol=ATOL, rtol=RTOL) assert_allclose(model.beta, [-0.002617161564786044], atol=ATOL, rtol=RTOL) h20 = model.scale * (1 - model.delta) / (model.scale + 1) model.unfix("delta") model.delta = 0.5 model.scale = 1.0 model.fit(verbose=False) assert_allclose(model.lml(), -20.725623168378522, atol=ATOL, rtol=RTOL) assert_allclose(model.delta, 0.5017852859580029, atol=1e-3) assert_allclose(model.scale, 0.9928931515372, atol=ATOL, rtol=RTOL) assert_allclose(model.beta, [-0.003203427206253548], atol=ATOL, rtol=RTOL) h21 = model.scale * (1 - model.delta) / (model.scale + 1) assert_allclose(h20, h21, atol=ATOL, rtol=RTOL)
def test_glmmexpfam_poisson(): random = RandomState(1) # sample size n = 30 # covariates offset = ones(n) * random.randn() age = random.randint(16, 75, n) M = stack((offset, age), axis=1) # genetic variants G = random.randn(n, 4) # sampling the phenotype alpha = random.randn(2) beta = random.randn(4) eps = random.randn(n) y = M @ alpha + G @ beta + eps # Whole genotype of each sample. X = random.randn(n, 50) # Estimate a kinship relationship between samples. X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1]) K = X_ @ X_.T + eye(n) * 0.1 # Update the phenotype y += random.multivariate_normal(zeros(n), K) y = (y - y.mean()) / y.std() z = y.copy() y = random.poisson(exp(z)) M = M - M.mean(0) QS = economic_qs(K) glmm = GLMMExpFam(y, "poisson", M, QS) assert_allclose(glmm.lml(), -52.479557279193585) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -34.09720756737648)
def _st_glmm(y, lik, M, QS, verbose): from numpy import nan from glimix_core.glmm import GLMMExpFam, GLMMNormal glmm = GLMMExpFam(y, lik, M, QS) glmm.fit(verbose=verbose) if QS is None: v0 = nan else: v0 = glmm.v0 v1 = glmm.v1 sys.stdout.flush() eta = glmm.site.eta tau = glmm.site.tau gnormal = GLMMNormal(eta, tau, M, QS) gnormal.fit(verbose=verbose) return gnormal.get_fast_scanner(), v0, v1
def test_glmmexpfam_bernoulli_problematic(): random = RandomState(1) N = 30 G = random.randn(N, N + 50) y = bernoulli_sample(0.0, G, random_state=random) G = ascontiguousarray(G, dtype=float) _stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) QS = economic_qs_linear(G) S0 = QS[1] S0 /= S0.mean() X = ones((len(y), 1)) model = GLMMExpFam(y, "bernoulli", X, QS=(QS[0], QS[1])) model.delta = 0 model.fix("delta") model.fit(verbose=False) assert_allclose(model.lml(), -20.727007958026853, atol=ATOL, rtol=RTOL) assert_allclose(model.delta, 0, atol=1e-3) assert_allclose(model.scale, 0.879915823030081, atol=ATOL, rtol=RTOL) assert_allclose(model.beta, [-0.00247856564728], atol=ATOL, rtol=RTOL)
def test_glmmexpfam_copy(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() z = random.multivariate_normal(0.2 * ones(nsamples), K) QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = zeros(nsamples, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) glmm0 = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) assert_allclose(glmm0.lml(), -29.10216812909928, atol=ATOL, rtol=RTOL) glmm0.fit(verbose=False) v = -19.575736562427252 assert_allclose(glmm0.lml(), v) glmm1 = glmm0.copy() assert_allclose(glmm1.lml(), v) glmm1.scale = 0.92 assert_allclose(glmm0.lml(), v, atol=ATOL, rtol=RTOL) assert_allclose(glmm1.lml(), -30.832831740038056, atol=ATOL, rtol=RTOL) glmm0.fit(verbose=False) glmm1.fit(verbose=False) v = -19.575736562378573 assert_allclose(glmm0.lml(), v) assert_allclose(glmm1.lml(), v)
import numpy as np import numpy_sugar as ns from glimix_core.glmm import GLMMExpFam from time import time G = np.load('null_G.npy') ntri = np.load('null_ntri.npy') nsuc = np.load('null_nsuc.npy') N, P = G.shape QS = ns.linalg.economic_qs(G.dot(G.T)) X = np.ones((N, 1)) ntri = np.asarray(ntri, float) nsuc = np.asarray(nsuc, float) start = time() glmm = GLMMExpFam((nsuc, ntri), "binomial", X, QS) glmm.fit(verbose=True) stop = time() elapsed = stop - start print("Elapsed: {}".format(elapsed)) np.save("out/fastglmm_N{}".format(N), elapsed)