Example #1
0
def davies_pval(Q, F):
    """Wrapper around davies_pvalue that catches AssertionError."""
    try:
        pval = davies_pvalue(Q, F)
    except AssertionError:
        print('Warning - Davies pvalue assertion error: zero p-value')
        pval = 0
    return pval
Example #2
0
def test_pval_calibration2():
    dof = [1, 1, 1]
    w = [128372.0, 23720.11, 528372.0]
    loc = [0.0, 0.0, 0.0]

    random.seed(2)
    samples = _sample(w, dof, loc, n=5000)

    values = [davies_pvalue(s, diag(w)) for s in samples]
    assert_allclose(median(values), 0.5, rtol=1e-2)
Example #3
0
def test_pval_calibration1():
    dof = [1, 1, 1]
    w = [0.5, 0.4, 0.1]
    loc = [0.0, 0.0, 0.0]

    random.seed(1)
    samples = _sample(w, dof, loc, n=5000)

    pvals = liu_sf(samples, w, dof, loc, kurtosis=False)[0]
    assert_allclose(median(pvals), 0.5, rtol=1e-2)

    pvals = liu_sf(samples, w, dof, loc, kurtosis=True)[0]
    assert_allclose(median(pvals), 0.5, rtol=1e-2)

    values = [davies_pvalue(s, diag(w)) for s in samples]
    assert_allclose(median(values), 0.5, rtol=1e-2)
Example #4
0
    def score_2dof_inter(self, X):
        from numpy import empty
        from numpy_sugar import ddot

        Q_rho = self._score_stats(X.ravel(), [0])

        g = X.ravel()
        Et = ddot(g, self._E)
        PEt = self._P(Et)

        EtPEt = Et.T @ PEt
        gPEt = g.T @ PEt

        n = Et.shape[1] + 1
        F = empty((n, n))

        F[0, 0] = 0
        F[0, 1:] = gPEt
        F[1:, 0] = F[0, 1:]
        F[1:, 1:] = EtPEt
        F /= 2

        return davies_pvalue(Q_rho[0], F)
Example #5
0
    def score_2dof_inter(self, X):
        """
        Interaction test.

        Parameters
        ----------
        X : 1d-array
            Genetic variant.

        Returns
        -------
        float
            P-value.
        """
        from numpy import empty
        from numpy_sugar import ddot

        Q_rho = self._score_stats(X.ravel(), [0])

        g = X.ravel()
        Et = ddot(g, self._E)
        PEt = self._P(Et)

        EtPEt = Et.T @ PEt
        gPEt = g.T @ PEt

        n = Et.shape[1] + 1
        F = empty((n, n))

        F[0, 0] = 0
        F[0, 1:] = gPEt
        F[1:, 0] = F[0, 1:]
        F[1:, 1:] = EtPEt
        F /= 2

        return davies_pvalue(Q_rho[0], F)
Example #6
0
def test_davies_pvalue():
    with data_file("davies_pvalue.npz") as filepath:
        data = load(filepath, allow_pickle=True)

    assert_allclose(davies_pvalue(*data["args"]), data["pval"])
Example #7
0
    def score_2_dof(self, X, snp_dim="col", debug=False):
        """
        Parameters
        ----------
        X : (`N`, `1`) ndarray
            genotype vector (TODO: X should be small)

        Returns
        -------
        pvalue : float
            P value
        """
        import scipy as sp
        import scipy.linalg as la
        import scipy.stats as st

        # 1. calculate Qs and pvs
        Q_rho = sp.zeros(len(self.rho_list))
        Py = P(self.gp, self.y)
        for i in range(len(self.rho_list)):
            rho = self.rho_list[i]
            LT = sp.vstack((rho ** 0.5 * self.vec_ones, (1 - rho) ** 0.5 * self.Env.T))
            LTxoPy = sp.dot(LT, X * Py)
            Q_rho[i] = 0.5 * sp.dot(LTxoPy.T, LTxoPy)

        # Calculating pvs is split into 2 steps
        # If we only consider one value of rho i.e. equivalent to SKAT and used for interaction test
        if len(self.rho_list) == 1:
            rho = self.rho_list[0]
            L = sp.hstack((rho ** 0.5 * self.vec_ones.T, (1 - rho) ** 0.5 * self.Env))
            xoL = X * L
            PxoL = P(self.gp, xoL)
            LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL)
            try:
                pval = davies_pvalue(Q_rho[0], LToxPxoL)
            except AssertionError:
                eighQ, UQ = la.eigh(LToxPxoL)
                pval = mod_liu_corrected(Q_rho[0], eighQ)
            # Script ends here for interaction test
            return pval
        # or if we consider multiple values of rho i.e. equivalent to SKAT-O and used for association test
        else:
            pliumod = sp.zeros((len(self.rho_list), 4))
            for i in range(len(self.rho_list)):
                rho = self.rho_list[i]
                L = sp.hstack(
                    (rho ** 0.5 * self.vec_ones.T, (1 - rho) ** 0.5 * self.Env)
                )
                xoL = X * L
                PxoL = P(self.gp, xoL)
                LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL)
                eighQ, UQ = la.eigh(LToxPxoL)
                pliumod[i,] = mod_liu_corrected(Q_rho[i], eighQ)
            T = pliumod[:, 0].min()
            # if optimal_rho == 0.999:
            #    optimal_rho = 1

            # 2. Calculate qmin
            qmin = sp.zeros(len(self.rho_list))
            percentile = 1 - T
            for i in range(len(self.rho_list)):
                q = st.chi2.ppf(percentile, pliumod[i, 3])
                # Recalculate p-value for each Q rho of seeing values at least as extreme as q again using the modified matching moments method
                qmin[i] = (q - pliumod[i, 3]) / (2 * pliumod[i, 3]) ** 0.5 * pliumod[
                    i, 2
                ] + pliumod[i, 1]

            # 3. Calculate quantites that occur in null distribution
            Px1 = P(self.gp, X)
            m = 0.5 * sp.dot(X.T, Px1)
            xoE = X * self.Env
            PxoE = P(self.gp, xoE)
            ETxPxE = 0.5 * sp.dot(xoE.T, PxoE)
            ETxPx1 = sp.dot(xoE.T, Px1)
            ETxPx11xPxE = 0.25 / m * sp.dot(ETxPx1, ETxPx1.T)
            ZTIminusMZ = ETxPxE - ETxPx11xPxE
            eigh, vecs = la.eigh(ZTIminusMZ)

            eta = sp.dot(ETxPx11xPxE, ZTIminusMZ)
            vareta = 4 * sp.trace(eta)

            OneZTZE = 0.5 * sp.dot(X.T, PxoE)
            tau_top = sp.dot(OneZTZE, OneZTZE.T)
            tau_rho = sp.zeros(len(self.rho_list))
            for i in range(len(self.rho_list)):
                tau_rho[i] = self.rho_list[i] * m + (1 - self.rho_list[i]) / m * tau_top

            MuQ = sp.sum(eigh)
            VarQ = sp.sum(eigh ** 2) * 2 + vareta
            KerQ = sp.sum(eigh ** 4) / (sp.sum(eigh ** 2) ** 2) * 12
            Df = 12 / KerQ

            # 4. Integration
            # from time import time
            # start = time()
            pvalue = optimal_davies_pvalue(
                qmin, MuQ, VarQ, KerQ, eigh, vareta, Df, tau_rho, self.rho_list, T
            )
            # print("Elapsed: {} seconds".format(time() - start))

            # Final correction to make sure that the p-value returned is sensible
            multi = 3
            if len(self.rho_list) < 3:
                multi = 2
            idx = sp.where(pliumod[:, 0] > 0)[0]
            pval = pliumod[:, 0].min() * multi
            if pvalue <= 0 or len(idx) < len(self.rho_list):
                pvalue = pval
            if pvalue == 0:
                if len(idx) > 0:
                    pvalue = pliumod[:, 0][idx].min()

            if debug:
                info = {
                    "Qs": Q_rho,
                    "pvs_liu": pliumod,
                    "qmin": qmin,
                    "MuQ": MuQ,
                    "VarQ": VarQ,
                    "KerQ": KerQ,
                    "lambd": eigh,
                    "VarXi": vareta,
                    "Df": Df,
                    "tau": tau_rho,
                }
                return pvalue, info
            else:
                return pvalue