def main(): q = [1.5, 3.0] mu = -0.5 var = 1.0 kur = 3.0 w = [10.0, 0.2, 0.1, 0.3] remain_var = 0.5 df = 3.4 trho = [5.1, 0.2] grid = [0.0, 0.01] print(optimal_davies_pvalue(q, mu, var, kur, w, remain_var, df, trho, grid))
def score_2dof_assoc(self, X): from numpy import trace, sum, where, empty from numpy.linalg import eigvalsh Q_rho = self._score_stats(X.ravel(), self._rhos) null_lambdas = self._score_stats_null_dist(X.ravel()) pliumod = self._score_stats_pvalue(Q_rho, null_lambdas) qmin = self._qmin(pliumod) # 3. Calculate quantites that occur in null distribution Px1 = self._P(X) m = 0.5 * (X.T @ Px1) xoE = X * self._E PxoE = self._P(xoE) ETxPxE = 0.5 * (xoE.T @ PxoE) ETxPx1 = xoE.T @ Px1 ETxPx11xPxE = 0.25 / m * (ETxPx1 @ ETxPx1.T) ZTIminusMZ = ETxPxE - ETxPx11xPxE eigh = eigvalsh(ZTIminusMZ) eta = ETxPx11xPxE @ ZTIminusMZ vareta = 4 * trace(eta) OneZTZE = 0.5 * (X.T @ PxoE) tau_top = OneZTZE @ OneZTZE.T tau_rho = empty(len(self._rhos)) for i in range(len(self._rhos)): tau_rho[i] = self._rhos[i] * m + (1 - self._rhos[i]) / m * tau_top MuQ = sum(eigh) VarQ = sum(eigh**2) * 2 + vareta KerQ = sum(eigh**4) / (sum(eigh**2)**2) * 12 Df = 12 / KerQ # 4. Integration T = pliumod[:, 0].min() pvalue = optimal_davies_pvalue(qmin, MuQ, VarQ, KerQ, eigh, vareta, Df, tau_rho, self._rhos, T) # Final correction to make sure that the p-value returned is sensible multi = 3 if len(self._rhos) < 3: multi = 2 idx = where(pliumod[:, 0] > 0)[0] pval = pliumod[:, 0].min() * multi if pvalue <= 0 or len(idx) < len(self._rhos): pvalue = pval if pvalue == 0: if len(idx) > 0: pvalue = pliumod[:, 0][idx].min() return pvalue
def test_optimal_davies_pvalue_bound(): with data_file("bound.npz") as filepath: data = dict(load(filepath, allow_pickle=True)) pval = optimal_davies_pvalue( data["qmin"], data["MuQ"], data["VarQ"], data["KerQ"], data["eigh"], data["vareta"], data["Df"], data["tau_rho"], data["rho_list"], ) assert_allclose(pval, 0.22029543318607503)
def test_optimal_davies_pvalue_nan(): with data_file("danilo_nan.npz") as filepath: data = dict(load(filepath)) pval = optimal_davies_pvalue( data["qmin"], data["MuQ"], data["VarQ"], data["KerQ"], data["eigh"], data["vareta"], data["Df"], data["tau_rho"], data["rho_list"], ) assert_allclose(pval, 0.39344574097360585)
def score_2dof_assoc(self, X, return_rho=False): """ Association test. Parameters ---------- X : 1d-array Genetic variant. return_rho : bool (optional) ``True`` to return the optimal ¤Ђ; ``False`` otherwise (Default). Returns ------- float P-value. float Optimal ¤Ђ. Returned only if ``return_rho == True``. """ from numpy import empty, sum, trace, where from numpy.linalg import eigvalsh Q_rho = self._score_stats(X.ravel(), self._rhos) null_lambdas = self._score_stats_null_dist(X.ravel()) pliumod = self._score_stats_pvalue(Q_rho, null_lambdas) optimal_rho = pliumod[:, 0].argmin() qmin = self._qmin(pliumod) # 3. Calculate quantites that occur in null distribution Px1 = self._P(X) m = 0.5 * (X.T @ Px1) xoE = X * self._E PxoE = self._P(xoE) ETxPxE = 0.5 * (xoE.T @ PxoE) ETxPx1 = xoE.T @ Px1 ETxPx11xPxE = 0.25 / m * (ETxPx1 @ ETxPx1.T) ZTIminusMZ = ETxPxE - ETxPx11xPxE eigh = eigvalsh(ZTIminusMZ) eta = ETxPx11xPxE @ ZTIminusMZ vareta = 4 * trace(eta) OneZTZE = 0.5 * (X.T @ PxoE) tau_top = OneZTZE @ OneZTZE.T tau_rho = empty(len(self._rhos)) for i in range(len(self._rhos)): tau_rho[i] = self._rhos[i] * m + (1 - self._rhos[i]) / m * tau_top MuQ = sum(eigh) VarQ = sum(eigh**2) * 2 + vareta KerQ = sum(eigh**4) / (sum(eigh**2)**2) * 12 Df = 12 / KerQ # 4. Integration T = pliumod[:, 0].min() pvalue = optimal_davies_pvalue(qmin, MuQ, VarQ, KerQ, eigh, vareta, Df, tau_rho, self._rhos, T) # Final correction to make sure that the p-value returned is sensible multi = 3 if len(self._rhos) < 3: multi = 2 idx = where(pliumod[:, 0] > 0)[0] pval = pliumod[:, 0].min() * multi if pvalue <= 0 or len(idx) < len(self._rhos): pvalue = pval if pvalue == 0: if len(idx) > 0: pvalue = pliumod[:, 0][idx].min() if return_rho: return pvalue, optimal_rho return pvalue
def score_2_dof(self, X, snp_dim="col", debug=False): """ Parameters ---------- X : (`N`, `1`) ndarray genotype vector (TODO: X should be small) Returns ------- pvalue : float P value """ import scipy as sp import scipy.linalg as la import scipy.stats as st # 1. calculate Qs and pvs Q_rho = sp.zeros(len(self.rho_list)) Py = P(self.gp, self.y) for i in range(len(self.rho_list)): rho = self.rho_list[i] LT = sp.vstack((rho ** 0.5 * self.vec_ones, (1 - rho) ** 0.5 * self.Env.T)) LTxoPy = sp.dot(LT, X * Py) Q_rho[i] = 0.5 * sp.dot(LTxoPy.T, LTxoPy) # Calculating pvs is split into 2 steps # If we only consider one value of rho i.e. equivalent to SKAT and used for interaction test if len(self.rho_list) == 1: rho = self.rho_list[0] L = sp.hstack((rho ** 0.5 * self.vec_ones.T, (1 - rho) ** 0.5 * self.Env)) xoL = X * L PxoL = P(self.gp, xoL) LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL) try: pval = davies_pvalue(Q_rho[0], LToxPxoL) except AssertionError: eighQ, UQ = la.eigh(LToxPxoL) pval = mod_liu_corrected(Q_rho[0], eighQ) # Script ends here for interaction test return pval # or if we consider multiple values of rho i.e. equivalent to SKAT-O and used for association test else: pliumod = sp.zeros((len(self.rho_list), 4)) for i in range(len(self.rho_list)): rho = self.rho_list[i] L = sp.hstack( (rho ** 0.5 * self.vec_ones.T, (1 - rho) ** 0.5 * self.Env) ) xoL = X * L PxoL = P(self.gp, xoL) LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL) eighQ, UQ = la.eigh(LToxPxoL) pliumod[i,] = mod_liu_corrected(Q_rho[i], eighQ) T = pliumod[:, 0].min() # if optimal_rho == 0.999: # optimal_rho = 1 # 2. Calculate qmin qmin = sp.zeros(len(self.rho_list)) percentile = 1 - T for i in range(len(self.rho_list)): q = st.chi2.ppf(percentile, pliumod[i, 3]) # Recalculate p-value for each Q rho of seeing values at least as extreme as q again using the modified matching moments method qmin[i] = (q - pliumod[i, 3]) / (2 * pliumod[i, 3]) ** 0.5 * pliumod[ i, 2 ] + pliumod[i, 1] # 3. Calculate quantites that occur in null distribution Px1 = P(self.gp, X) m = 0.5 * sp.dot(X.T, Px1) xoE = X * self.Env PxoE = P(self.gp, xoE) ETxPxE = 0.5 * sp.dot(xoE.T, PxoE) ETxPx1 = sp.dot(xoE.T, Px1) ETxPx11xPxE = 0.25 / m * sp.dot(ETxPx1, ETxPx1.T) ZTIminusMZ = ETxPxE - ETxPx11xPxE eigh, vecs = la.eigh(ZTIminusMZ) eta = sp.dot(ETxPx11xPxE, ZTIminusMZ) vareta = 4 * sp.trace(eta) OneZTZE = 0.5 * sp.dot(X.T, PxoE) tau_top = sp.dot(OneZTZE, OneZTZE.T) tau_rho = sp.zeros(len(self.rho_list)) for i in range(len(self.rho_list)): tau_rho[i] = self.rho_list[i] * m + (1 - self.rho_list[i]) / m * tau_top MuQ = sp.sum(eigh) VarQ = sp.sum(eigh ** 2) * 2 + vareta KerQ = sp.sum(eigh ** 4) / (sp.sum(eigh ** 2) ** 2) * 12 Df = 12 / KerQ # 4. Integration # from time import time # start = time() pvalue = optimal_davies_pvalue( qmin, MuQ, VarQ, KerQ, eigh, vareta, Df, tau_rho, self.rho_list, T ) # print("Elapsed: {} seconds".format(time() - start)) # Final correction to make sure that the p-value returned is sensible multi = 3 if len(self.rho_list) < 3: multi = 2 idx = sp.where(pliumod[:, 0] > 0)[0] pval = pliumod[:, 0].min() * multi if pvalue <= 0 or len(idx) < len(self.rho_list): pvalue = pval if pvalue == 0: if len(idx) > 0: pvalue = pliumod[:, 0][idx].min() if debug: info = { "Qs": Q_rho, "pvs_liu": pliumod, "qmin": qmin, "MuQ": MuQ, "VarQ": VarQ, "KerQ": KerQ, "lambd": eigh, "VarXi": vareta, "Df": Df, "tau": tau_rho, } return pvalue, info else: return pvalue
def test(self, return_rho=False): """Tests for allelic imbalance. Args: return_rho: If True, return optimal rho. Returns: P-value and optimal rho if return_rho is True. """ # compute score statistic for each rho Q_rho = self._compute_score() # compute parameters of the score distribution Fs, null_lambdas = self._compute_score_dist_parameters() # approximate score distribution for each rho if len(self.rhos) == 1: # approximate null distribution using Davies method pvalue = davies_pval(Q_rho[0], Fs[0]) if return_rho: return pvalue, self.rhos[0] else: return pvalue # approximate their distributions using Liu's method: approx_out = self._approximate_score_dist(Q_rho, null_lambdas) if approx_out[:, 0].min() < 4e-14: # beyond Liu method's precision, use Davies + Bonferroni pvalues = [davies_pval(Q_rho[i], Fs[i]) for i in range(len(self.rhos))] pvalues = np.asarray(pvalues) min_idx = pvalues.argmin() pvalue = pvalues[min_idx] * len(self.rhos) if return_rho: return pvalue, self.rhos[min_idx] else: return pvalue # the smallest p-value will be the combined test statistic for all rhos T = approx_out[:, 0].min() optimal_rho = self.rhos[approx_out[:, 0].argmin()] # compute elements of the null distribution for T qmin = self._compute_qmin(approx_out) null_params = self._compute_null_parameters() # compute final p-value # return 2 * qmin, *null_params, self.rhos, T pvalue = optimal_davies_pvalue(2 * qmin, *null_params, self.rhos, T) # resort to Bonferroni in case of numerical issues # TODO find more robust estimation if pvalue <= 0: pvalue = T * len(self.rhos) if return_rho: return pvalue, optimal_rho return pvalue
def test_optimal_davies_pvalue(): with data_file("optimal_davies_pvalue.npz") as filepath: data = load(filepath, allow_pickle=True) pval = optimal_davies_pvalue(*data["args"]) assert_allclose(pval, 0.9547608685218306)