예제 #1
0
def test_structlmm_assoc():
    random = RandomState(1)
    n = 20
    k = 4
    y = random.randn(n, 1)
    E = random.randn(n, k)
    M = ones((n, 1))
    x = 1.0 * (random.rand(n, 1) < 0.2)

    slmm = StructLMM(y, M, E, W=E)
    slmm.fit(verbose=False)

    pv = slmm.score_2dof_assoc(x)
    assert_allclose([pv], [0.8470039620073695], rtol=1e-5)
예제 #2
0
def test_structlmm_inter():
    random = RandomState(1)
    n = 20
    k = 4
    y = random.randn(n, 1)
    E = random.randn(n, k)
    M = ones(n)
    x = 1.0 * (random.rand(n) < 0.2)
    M = stack([M, x], axis=1)

    slmm = StructLMM(y, M, E, W=E)
    slmm.fit(verbose=False)

    pv = slmm.score_2dof_inter(x)
    assert_allclose([pv], [0.6781070640353783], rtol=1e-5)
예제 #3
0
def struct_lmm(geno_df,
               pheno,
               env,
               covs=None,
               rhos=None,
               no_association_test=False,
               no_interaction_test=False):
    """
    Utility function to run StructLMM

    Parameters
    ----------
    geno_df: (`N`, `S`) pandas.DataFrame
        genotype matrix for `N` samples, `S` SNPis
    pheno : (`N`, 1) ndarray
        phenotype vector
    env : (`N`, `K`)
          Environmental matrix (indviduals by number of environments)
    covs : (`N`, L) ndarray
        fixed effect design for covariates `N` samples and `L` covariates.
    rhos : list
        list of ``rho`` values.
        ``rho=0`` correspond to no persistent effect (only GxE);
        ``rho=1`` corresponds to only persitent effect (no GxE);
        By default, ``rho=[0, 0.2, 0.4, 0.6, 0.8, 1.]``
    no_association_test : bool
        if True the association test is not considered.
        The default value is False.
    no_interaction_test : bool
        if True the interaction test is not considered.
        The default value is False.
    
    Returns
    -------
    res : *:class:`pandas.DataFrame`*
        contains pv of joint test (if no_association_test is False), 
        pv of interaction test (if no_interaction_test is False).
    """
    #import pdb; pdb.set_trace()
    if covs is None:
        covs = sp.ones((env.shape[0], 1))
    if rhos is None:
        rhos = [0, .2, .4, .6, .8, 1.]
    if not no_association_test:
        # slmm fit null
        slmm = StructLMM(pheno, env, W=env, rho_list=rhos)
        null = slmm.fit_null(F=covs, verbose=False)
    if not no_interaction_test:
        # slmm int
        slmm_int = StructLMM(pheno, env, W=env, rho_list=[0])
    t0 = time.time()
    #import pdb; pdb.set_trace()
    res = pd.DataFrame(data=[], index=geno_df.columns.values)
    pv = sp.zeros(geno_df.shape[1])
    pv_int = sp.zeros(geno_df.shape[1])

    for snp in xrange(geno_df.shape[1]):
        x = geno_df.values[:, [snp]]
        if not no_association_test:
            # association test
            _p, _rho = slmm.score_2_dof(x)  #second arg: optimal rho
            pv[snp] = _p
        if not no_interaction_test:
            # interaction test
            covs1 = sp.hstack((covs, x))
            null = slmm_int.fit_null(F=covs1, verbose=False)
            _p, _rho = slmm_int.score_2_dof(x)
            pv_int[snp] = _p
    # add pvalues to res
    if not no_association_test:
        res['pv'] = pv
    if not no_interaction_test:
        res['pv_int'] = pv_int
    t = time.time() - t0
    #print '%.2f s elapsed' % t
    return res
예제 #4
0
def st_sscan(G, y, E, M=None, tests=None, verbose=True):
    """Mixed-model with genetic effect heterogeneity.

    Parameters
    ----------
    pheno : (`N`, 1) ndarray
        phenotype data
    environments : (`N`, `E`) ndarray
        environments data.
    covs : (`N`, `D`) ndarray
        covariate design matrix.
        By default, ``covs`` is a (`N`, `1`) array of ones.
    tests : list
        Which tests are performed.
        Element list values are ``'inter'`` and ``'assoc'``.
        By default, only the interaction test is considered.
    rhos : list
        for the association test, a list of ``rho`` values must be specified.
        The choice of ``rho`` affects the statistical power of the test
        (for more information see the StructLMM paper).
        By default, ``rho=[0, 0.1**2, 0.2**2, 0.3**2, 0.4**2, 0.5**2, 0.5, 1.]``
    verbose : (bool, optional):
        if True, details such as runtime as displayed.
    """
    from struct_lmm import StructLMM
    from numpy import zeros, hstack, asarray
    from pandas import DataFrame

    rhos = [0.0, 0.1**2, 0.2**2, 0.3**2, 0.4**2, 0.5**2, 0.5, 1.0]

    with session_block("struct-lmm analysis", disable=not verbose):

        with session_line("Normalising input... ", disable=not verbose):
            data = conform_dataset(y, M, G=G, K=None)

        y = data["y"]
        M = data["M"]
        G = data["G"]

        if tests is None:
            tests = ["inter"]

        if "inter" in tests:
            slmi = StructLMM(asarray(y, float), E, W=E, rho_list=[0])

        if "assoc" in tests:
            slmm = StructLMM(asarray(y, float), E, W=E, rho_list=rhos)
            slmm.fit_null(F=asarray(M, float), verbose=False)

        _pvi = zeros(G.shape[1])
        _pva = zeros(G.shape[1])
        for snp in range(G.shape[1]):
            x = asarray(G[:, [snp]], float)

            if "inter" in tests:
                # interaction test
                M1 = hstack((M, x))
                slmi.fit_null(F=M1, verbose=False)
                _pvi[snp] = slmi.score_2_dof(x)

            if "assoc" in tests:
                # association test
                _pva[snp] = slmm.score_2_dof(x)

    data = OrderedDict()
    data["pvi"] = _pvi
    data["pva"] = _pva
    return DataFrame(data)