def __init__(self, snps_test, phenotype, K=None, snps_K=None, covariates=None, h2=None, interact_with_snp=None, nGridH2=10, standardizer=None, add_bias=True, normalize_K=True, blocksize=10000): if standardizer is None: standardizer = pysnptools.standardizer.Unit() self.standardizer = standardizer self.phenotype = GWAS.check_pheno_format(phenotype=phenotype) self.covariates = GWAS.check_pheno_format(phenotype=covariates) self.snps_test = snps_test self.snps_K = snps_K self.K = K self.linreg = False if (self.K is None) and (self.snps_K is None): G = np.zeros((self.sample_count, 0)) self.linreg = True elif self.K is None: G = None self.K = snps_K.kernel(standardizer=standardizer, blocksize=blocksize) if normalize_K: self.K /= self.K.diagonal().mean() elif snps_K is None: G = None if normalize_K: self.K = self.K / self.K.diagonal().mean() else: raise NotImplementedError("either K or snps_K has to be None") if add_bias and (self.covariates is None): pass # this case is treated in LMM() elif add_bias: bias = pd.Series(np.ones((self.covariates.shape[0])), index=self.covariates.index) self.covariates['bias'] = bias elif (add_bias == False) and self.covariates is None: raise NotImplementedError( "currently a model with neither bias term, nor covariates is not supported." ) self.interact_with_snp = interact_with_snp self.nGridH2 = nGridH2 if self.covariates is not None: self.lmm = lmm_cov.LMM(X=self.covariates.values, Y=None, G=G, K=self.K, inplace=True) else: self.lmm = lmm_cov.LMM(X=None, Y=None, G=G, K=self.K, inplace=True) self._find_h2()
def __init__(self, Y, X=None, appendbias=False, forcefullrank=False, G0=None, K0=None, nullModel=None, altModel=None): association.varcomp_test.__init__(self, Y=Y, X=X, appendbias=appendbias) N = self.Y.shape[0] self.forcefullrank = forcefullrank self.nullModel = nullModel self.altModel = altModel self.G0 = G0 self.K0 = K0 self.__testGcalled = False self.lmm = lmm.LMM(forcefullrank=self.forcefullrank, X=self.X, linreg=None, Y=self.Y[:, np.newaxis], G=self.G0, K=self.K0, regressX=True) self.model0 = self.lmm.findH2( ) # The null model only has a single kernel and only needs to find h2 self.model1 = None
def est_h2(Y, K, covariates=None, nGridH2=10000, plot=True, verbose=True): """ This function implements the Bayesian heritability estimate from Furlotte et al., 2014 Furlotte, Nicholas A., David Heckerman, and Christoph Lippert. "Quantifying the uncertainty in heritability." Journal of human genetics 59.5 (2014): 269-275. Args: Y: [N x 1] np.ndarray of phenotype values K: [N x N] np.ndarray of kinship values covariates: [N x D] np.ndarray of covariate values [default: None] plot: Boolean, create a plot? [default: True] verbose: print results? [default: True] returns: REML estimate of h^2 (as in Yang et al. 2010) posterior mean of h^2 posterior variance of h^2 h2 values on a grid posterior for h2 values on a grid """ lmm = lmm_cov.LMM(forcefullrank=False, X=covariates, linreg=None, Y=Y, G=None, K=K, regressX=True, inplace=False) h2 = lmm.findH2() h2_posterior = lmm.posterior_h2(nGridH2=nGridH2) logp = -h2_posterior[2] grid = h2_posterior[1] post_h2 = np.exp(logp - logp.max()) / np.exp(logp - logp.max()).sum() * logp.shape[0] h2_mean = (np.exp(logp - logp.max()) * grid[:, np.newaxis]).sum() / np.exp(logp - logp.max()).sum() h2_var = (np.exp(logp - logp.max()) * (grid[:, np.newaxis] - h2_mean)** 2.0).sum() / np.exp(logp - logp.max()).sum() if plot: import pylab as plt plt.figure() plt.plot([h2['h2'], h2['h2']], [0, 1], "r") plt.plot([h2_mean, h2_mean], [0, 1], "g") plt.legend([ "REML-estimate = %.3f" % h2['h2'], "posterior mean = %.3f" % h2_mean ]) plt.plot(grid.flatten(), post_h2.flatten()) plt.xlabel("$h^2$") plt.ylabel("$p( h^2 | Data)$") if verbose: print("max[h^2] = %.5f E[h^2] = %.5f +- %.5f" % (h2['h2'], h2_mean, np.sqrt(h2_var))) return h2, h2_mean, h2_var, grid, post_h2
def assoc_scan(Y, X, covariates=None, K=None): lmm = lmm_cov.LMM(X=covariates, Y=Y, G=X, K=K) # opt = lmm.find_log_delta(X.shape[1], nGridH2=100, REML=False) opt = lmm.findH2(nGridH2=100) h2 = opt['h2'] res = lmm.nLLeval(h2=h2, delta=None, dof=None, scale=1.0, penalty=0.0, snps=X) chi2stats = res['beta'] * res['beta'] / res['variance_beta'] p_values = st.chi2.sf(chi2stats, 1)[:, 0] return p_values, h2