Ejemplo n.º 1
0
def run_dynamic_eqtl_one_test_lmm(expression, genotype, covariates, groups,
                                  environmental_variable):
    num_cov = covariates.shape[1]
    # Covariate matrix
    X = np.vstack((expression, groups, genotype, environmental_variable,
                   environmental_variable * genotype, covariates.T)).T
    # Create column names
    cov_names = ['cov' + str(i) for i in range(num_cov)]
    col_names = ['y', 'group', 'g', 'e', 'gXe'] + cov_names

    # Make df
    df = pd.DataFrame(X, columns=col_names)
    # Make formula for LMM
    if num_cov > 0:
        formula = 'y ~ g + e + gXe + ' + ' + '.join(
            cov_names) + ' + (1 | group)'
    else:
        formula = 'y ~ g + e + gXe + ' + '(1 | group)'

    model = Lmer(formula, data=df)
    model.fit()

    beta = model.coefs['Estimate'][3]
    standard_error = model.coefs['SE'][3]
    pvalue = model.coefs['P-val'][3]
    #t_value = fit['T-stat'][1]
    #normal_approx_p = 2.0*(1.0 - scipy.stats.norm.cdf(abs(t_value)))
    #residual_scale = model.ranef_var.Std[1]
    return pvalue
def run_linear_mixed_model_for_initialization(Y, G, cov, z):
    num_tests = Y.shape[1]
    F_betas = []
    C_betas = []
    residuals = []
    model_eq = 'y ~ g'
    for cov_num in range(cov.shape[1]):
        model_eq = model_eq + ' + x' + str(cov_num)
    model_eq = model_eq + ' + (1|z)'
    # 119, 103
    for test_number in range(num_tests):
        print(test_number)
        y_vec = Y[:, test_number]
        g_vec = G[:, test_number]
        dd = {'y': y_vec, 'z': z, 'g': g_vec}
        num_covs = cov.shape[1]
        for cov_num in range(num_covs):
            dd['x' + str(cov_num)] = cov[:, cov_num]
        df = pd.DataFrame(dd)
        model = Lmer(model_eq, data=df)
        model.fit()
        pdb.set_trace()
        residuals.append(model.residuals)
        print(
            np.mean(model.residuals / g_vec) / np.std(model.residuals / g_vec))
        print('\n')
        # no_re_pred = np.dot(cov[:,1:],model.coefs['Estimate'][2:]) + model.coefs['Estimate'][0] + model.coefs['Estimate'][1]*g_vec
    residuals = np.transpose(np.asarray(residuals))
    return residuals
Ejemplo n.º 3
0
def run_bootstrapped_eqtl_lmm_stability_one_test(expression, genotype,
                                                 covariates, individuals,
                                                 individual_to_cells,
                                                 num_bootstraps,
                                                 sampling_fraction):
    num_cov = covariates.shape[1]

    # Covariate matrix
    X = np.vstack((expression, individuals, genotype, covariates.T)).T
    # Create column names
    cov_names = ['cov' + str(i) for i in range(num_cov)]
    col_names = ['y', 'group', 'g'] + cov_names

    # Make df
    df = pd.DataFrame(X, columns=col_names)
    # Make formula for LMM
    if num_cov > 0:
        formula = 'y ~ g + ' + ' + '.join(cov_names) + ' + (1 | group)'
    else:
        formula = 'y ~ g + ' + '(1 | group)'

    bootstrapped_betas = []
    for bootstrap_num in range(num_bootstraps):
        print(bootstrap_num)
        indices = get_bootstrapped_indices(individuals, individual_to_cells,
                                           sampling_fraction)
        model = Lmer(formula, data=df.iloc[indices, :])
        model.fit()
        bootstrapped_beta = model.coefs['Estimate'][1]
        #bootstrapped_beta, bootstrapped_std_err, bootstrapped_pvalue = run_eqtl_one_test_lmm(expression[indices], genotype[indices], covariates[indices,:], individuals[indices])
        bootstrapped_betas.append(bootstrapped_beta)
    return np.asarray(bootstrapped_betas)
Ejemplo n.º 4
0
def test_gaussian_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
    model = Lmer('DV ~ IV3 + IV2 + (IV2|Group) + (1|IV3)', data=df)
    model.fit(summarize=False)

    assert model.coefs.shape == (3, 8)
    estimates = np.array([12.04334602, -1.52947016, 0.67768509])
    assert np.allclose(model.coefs['Estimate'], estimates, atol=.001)

    assert isinstance(model.fixef, list)
    assert model.fixef[0].shape == (47, 3)
    assert model.fixef[1].shape == (3, 3)

    assert isinstance(model.ranef, list)
    assert model.ranef[0].shape == (47, 2)
    assert model.ranef[1].shape == (3, 1)

    assert model.ranef_corr.shape == (1, 3)
    assert model.ranef_var.shape == (4, 3)

    assert np.allclose(model.coefs.loc[:, 'Estimate'],
                       model.fixef[0].mean(),
                       atol=.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True),
                       model.data.fits)
Ejemplo n.º 5
0
def test_glmer_opt_passing():
    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    df["DV_int"] = np.random.randint(1, 10, df.shape[0])
    m = Lmer("DV_int ~ IV3 + (1|Group)", data=df, family="poisson")
    m.fit(summarize=False,
          control="optCtrl = list(FtolAbs=1e-1, FtolRel=1e-1, maxfun=10)")
    assert len(m.warnings) >= 1
Ejemplo n.º 6
0
def test_anova():

    np.random.seed(1)
    data = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    data["DV_l2"] = np.random.randint(0, 4, data.shape[0])
    model = Lmer("DV ~ IV3*DV_l2 + (IV3|Group)", data=data)
    model.fit(summarize=False)
    out = model.anova()
    assert out.shape == (3, 7)
Ejemplo n.º 7
0
def test_inverse_gaussian_lmm():

    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    df["DV_g"] = np.random.uniform(1, 2, size=df.shape[0])
    m = Lmer("DV_g ~ IV3 + (1|Group)", data=df, family="inverse_gaussian")
    m.fit(summarize=False)
    assert m.family == "inverse_gaussian"
    assert m.coefs.shape == (2, 7)
Ejemplo n.º 8
0
def test_gamma_lmm():

    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
    df['DV_g'] = np.random.uniform(1, 2, size=df.shape[0])
    m = Lmer('DV_g ~ IV3 + (1|Group)', data=df, family='gamma')
    m.fit(summarize=False)
    assert m.family == 'gamma'
    assert m.coefs.shape == (2, 7)
Ejemplo n.º 9
0
def test_poisson_lmm():
    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
    df['DV_int'] = np.random.randint(1, 10, df.shape[0])
    m = Lmer('DV_int ~ IV3 + (1|Group)', data=df, family='poisson')
    m.fit(summarize=False)
    assert m.family == 'poisson'
    assert m.coefs.shape == (2, 7)
    assert 'Z-stat' in m.coefs.columns
Ejemplo n.º 10
0
def mixeff_multinteraction2level_model(dataframe):
    """
    Multi-level model_5_sci includes intercept, multiple interactions and
    fixed effects,
     and setting ESCS as random on country level.

        :param dataframe: a data frame with student ID, school ID, country ID,
        science, math, reading, and other five selected variables as
        predictors.
        :return: the model results
    """
    # one random effect and multiple interactions between gender and factors
    model_5_sci = Lmer(
        'log_science ~ IBTEACH + WEALTH + ESCS + female + '
        'Sch_science_resource '
        '+ female*ESCS '
        '+ female*WEALTH + female*IBTEACH + (ESCS | CountryID)',
        data=dataframe)
    # model must be fitted in order to get estimate results
    model_5_sci.fit(REML=False)
    # print summary since auto-generated result doesn't include fixed effects
    print(model_5_sci.summary())
    model_5_sci.plot_summary()
    # Visualizing random effect of a predictor
    model_5_sci.plot('ESCS', plot_ci=True, ylabel='Predicted log_science')

    sns.regplot(x='ESCS', y='residuals', data=model_5_sci.data, fit_reg=False)
    # Inspecting overall fit
    sns.regplot(x='fits',
                y='log_science',
                units='CountryID',
                data=model_5_sci.data,
                fit_reg=True)
    return model_5_sci
Ejemplo n.º 11
0
def random_effect_2level_model(dataframe):
    """
    Multi-level model_1_sci includes intercept, variable as fixed and the
    interaction term
    random on country level.

        :param dataframe: a data frame with student ID, school ID, country ID,
        science, math, reading, and other five selected variables as
        predictors.
        :return: the model results
    """
    # Random intercept and slope two-level model:
    model_1_sci = Lmer('Science ~ female + (female*ESCS | CountryID)',
                       data=dataframe)
    # model must be fitted in order to get estimate results
    model_1_sci.fit(REML=False)
    # print summary since auto-generated result doesn't include fixed effects
    print(model_1_sci.summary())
    model_1_sci.plot_summary()
    # Visualizing random effect of a predictor
    model_1_sci.plot('female', plot_ci=True, ylabel='Predicted log_science')

    sns.regplot(x='female',
                y='residuals',
                data=model_1_sci.data,
                fit_reg=False)
    # Inspecting overall fit
    sns.regplot(x='fits',
                y='log_science',
                units='CountryID',
                data=model_1_sci.data,
                fit_reg=True)
    return model_1_sci
Ejemplo n.º 12
0
def run_bootstrapped_eqtl_stability_with_residuals_one_test_v2(
        expression, genotype, covariates, individuals, individual_to_cells,
        num_bootstraps, sampling_fraction, seed):
    np.random.seed(seed)
    #residual_expression = regress_out_covariates(expression, covariates)
    #residual_genotype = regress_out_covariates(genotype, covariates)

    # Covariate matrix
    num_cov = covariates.shape[1]
    X = np.vstack(
        (expression, individuals.astype(str), genotype, covariates.T)).T
    # Create column names
    cov_names = ['cov' + str(i) for i in range(num_cov)]
    col_names = ['y', 'group', 'g'] + cov_names

    # Make df
    df = pd.DataFrame(X, columns=col_names)
    # Make formula for LMM
    if num_cov > 0:
        formula = 'y ~ g + ' + ' + '.join(cov_names) + ' + (1 | group)'
    else:
        formula = 'y ~ g + ' + '(1 | group)'

    model = Lmer(formula, data=df)
    model.fit()

    beta = model.coefs['Estimate'][1]
    standard_error = model.coefs['SE'][1]
    eqtl_pvalue = model.coefs['P-val'][1]
    bp_test = het_breuschpagan(model.residuals, np.vstack(genotype))
    pdb.set_trace()
    #X2 = sm.add_constant(X)
    #reg = LinearRegression().fit(X, expression)
    #est = sm.MixedLM(endog=expression, exog=X2, groups=individuals).fit()
    #est = sm.OLS(expression,X2).fit()
    #eqtl_pvalue = est.pvalues[1]
    #bp_test = het_breuschpagan(est.resid, np.vstack(genotype))
    #bp_test = het_breuschpagan(est.resid, X)
    #white_test = het_white(est.resid,X)
    #print(white_test)
    #model = ols(expression, X)
    #for bootstrap_num in range(num_bootstraps):
    #	indices = get_bootstrapped_indices(individuals, individual_to_cells, sampling_fraction)
    #	bootstrapped_beta = run_eqtl_on_residual_expression_one_test_lm(residual_expression[indices], genotype[indices])
    #bootstrapped_betas.append(bootstrapped_beta)
    #bootstrapped_perm_beta = run_eqtl_on_residual_expression_one_test_lm(residual_expression[indices], np.random.permutation(genotype[indices]))
    #bootstrapped_perm_betas.append(bootstrapped_perm_beta)
    #print(np.max(bootstrapped_betas) - np.min(bootstrapped_betas))
    #print(np.max(bootstrapped_perm_betas) - np.min(bootstrapped_perm_betas))
    #print(np.var(bootstrapped_betas))
    #print(np.var(bootstrapped_perm_betas))
    #print(np.mean(bootstrapped_betas))
    #print(np.mean(bootstrapped_perm_betas))
    return eqtl_pvalue, bp_test[3]
Ejemplo n.º 13
0
def test_install():
    """
    Quick function to test installation by import a lmm object and fitting a quick model.
    """
    try:
        from pymer4.models import Lmer
        from pymer4.utils import get_resource_path
        import os
        import pandas as pd
        import warnings
        warnings.filterwarnings("ignore")
        df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
        model = Lmer('DV ~ IV3 + (1|Group)', data=df)
        model.fit(summarize=False)
        print("Pymer4 installation working successfully!")
    except Exception as e:
        print("Error! {}".format(e))
Ejemplo n.º 14
0
def fixed_effect_3level_model(dataframe):
    """
    Multi-level model_2_sci includes intercept, variables as fixed effect.

        :param dataframe: a data frame with student ID, school ID, country ID,
        science, math, reading, and other five selected variables as
        predictors.
        :return: the model results
    """
    # Fixed effects three-level model
    model_2_sci = Lmer(
        'log_science ~ IBTEACH + WEALTH '
        '+ ESCS + female + Sch_science_resource '
        '+ (1 | SchoolID/CountryID)',
        data=dataframe)
    # model must be fitted in order to get estimate results
    model_2_sci.fit(REML=False)
    # print summary since auto-generated result doesn't include fixed effects
    print(model_2_sci.summary())
    model_2_sci.plot_summary()
    sns.regplot(x='Sch_science_resource',
                y='residuals',
                data=model_2_sci.data,
                fit_reg=False)
    # Inspecting overall fit
    sns.regplot(x='fits',
                y='log_science',
                units='CountryID',
                data=model_2_sci.data,
                fit_reg=True)

    return model_2_sci
Ejemplo n.º 15
0
def random_intercept_3level_model(dataframe):
    """
    Multi-level model_0_sci includes grand-mean intercept and setting outcome
    of log science
    scores as random.

        :param dataframe: a data frame with student ID, school ID, country ID,
        science, math, reading, and other five selected variables as
        predictors.
        :return: the model results
    """
    # Random Intercept-only three-level model
    model_0_sci = Lmer('log_science ~ 1 | SchoolID/CountryID', data=dataframe)
    # model must be fitted in order to get estimate results
    model_0_sci.fit(REML=False)
    # print summary since auto-generated result doesn't include fixed effects
    print(model_0_sci.summary())
    # plot summary
    model_0_sci.plot_summary()
    # Inspecting overall fit
    sns.regplot(x='fits',
                y='log_science',
                units='CountryID',
                data=model_0_sci.data,
                fit_reg=True)
    return model_0_sci
Ejemplo n.º 16
0
def test_lmer_opt_passing():
    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV ~ IV2 + (IV2|Group)", data=df)
    opt_opts = "optCtrl = list(ftol_abs=1e-8, xtol_abs=1e-8)"
    model.fit(summarize=False, control=opt_opts)
    estimates = np.array([10.301072, 0.682124])
    assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001)
    assert len(model.warnings) == 0

    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV ~ IV2 + (IV2|Group)", data=df)
    opt_opts = "optCtrl = list(ftol_abs=1e-4, xtol_abs=1e-4)"
    model.fit(summarize=False, control=opt_opts)
    assert len(model.warnings) >= 1
Ejemplo n.º 17
0
def test_logistic_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
    model = Lmer('DV_l ~ IV1+ (IV1|Group)', data=df, family='binomial')
    model.fit(summarize=False)

    assert model.coefs.shape == (2, 13)
    estimates = np.array([-0.16098421, 0.00296261])
    assert np.allclose(model.coefs['Estimate'], estimates, atol=.001)

    assert isinstance(model.fixef, pd.core.frame.DataFrame)
    assert model.fixef.shape == (47, 2)

    assert isinstance(model.ranef, pd.core.frame.DataFrame)
    assert model.ranef.shape == (47, 2)

    assert np.allclose(model.coefs.loc[:, 'Estimate'],
                       model.fixef.mean(),
                       atol=.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True),
                       model.data.fits)
    assert np.allclose(
        model.predict(model.data, use_rfx=True, pred_type='link'),
        logit(model.data.fits))
Ejemplo n.º 18
0
def test_gaussian_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV ~ IV3 + IV2 + (IV2|Group) + (1|IV3)", data=df)
    opt_opts = "optimizer='Nelder_Mead', optCtrl = list(FtolAbs=1e-8, XtolRel=1e-8)"
    model.fit(summarize=False, control=opt_opts)

    assert model.coefs.shape == (3, 8)
    estimates = np.array([12.04334602, -1.52947016, 0.67768509])
    assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001)

    assert isinstance(model.fixef, list)
    assert model.fixef[0].shape == (47, 3)
    assert model.fixef[1].shape == (3, 3)

    assert isinstance(model.ranef, list)
    assert model.ranef[0].shape == (47, 2)
    assert model.ranef[1].shape == (3, 1)
    assert (model.ranef[1].index == ["0.5", "1", "1.5"]).all()

    assert model.ranef_corr.shape == (1, 3)
    assert model.ranef_var.shape == (4, 3)

    assert np.allclose(model.coefs.loc[:, "Estimate"],
                       model.fixef[0].mean(),
                       atol=0.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True),
                       model.data.fits)

    # Test simulate
    out = model.simulate(2)
    assert isinstance(out, pd.DataFrame)
    assert out.shape == (model.data.shape[0], 2)

    out = model.simulate(2, use_rfx=True)
    assert isinstance(out, pd.DataFrame)
    assert out.shape == (model.data.shape[0], 2)

    # Smoketest for old_optimizer
    model.fit(summarize=False, old_optimizer=True)
Ejemplo n.º 19
0
def get_tvals(measure, features, reverse=False):
    t_matrix = np.zeros((len(measure), len(stats)))
    p_matrix = np.zeros((len(measure), len(stats)))

    method_count = len(set(features['method']))

    for measure_index, net_index in list(
            it.product(range(len(measure)), range(len(stats)))):
        measure_stat = measure[measure_index]
        net_stat = stats[net_index]

        # create a smaller dataframe

        df = features[['userID', 'topic', 'method', measure_stat, net_stat]]
        df = df.rename(columns={
            measure_stat: 'measure_stat',
            net_stat: 'net_stat'
        })

        # run model
        if method_count > 1:  # if methods to compare
            model = Lmer(
                'measure_stat ~ net_stat  + (1 | topic ) + (1 | method)',
                data=df)
            model.fit(no_warnings=True, summarize=False)

        else:  # no method comparison
            model = Lmer('measure_stat ~ net_stat  + (1 | topic )', data=df)
            model.fit(no_warnings=True, summarize=False)

        # get t-vals
        t_val = model.coefs['T-stat']['net_stat']

        if np.isnan(t_val):
            t_val = 0
            print('Warning: no t_val found for method %s, feature %s.\
                Correlation estimated at 0.')

        t_matrix[measure_index][net_index] = t_val

        # get p-val
        p_val = model.coefs['P-val']['net_stat']
        p_matrix[measure_index][net_index] = p_val

    corr = pd.DataFrame(t_matrix.T, index=stats, columns=measure)

    return corr
Ejemplo n.º 20
0
def test_contrasts():
    df = sns.load_dataset("gammas").rename(columns={"BOLD signal": "bold"})
    grouped_means = df.groupby("ROI")["bold"].mean()
    model = Lmer("bold ~ ROI + (1|subject)", data=df)

    custom_contrast = grouped_means["AG"] - np.mean(
        [grouped_means["IPS"], grouped_means["V1"]])
    grand_mean = grouped_means.mean()

    con1 = grouped_means["V1"] - grouped_means["IPS"]
    con2 = grouped_means["AG"] - grouped_means["IPS"]
    intercept = grouped_means["IPS"]

    # Treatment contrasts with non-alphabetic order
    model.fit(factors={"ROI": ["IPS", "V1", "AG"]}, summarize=False)

    assert np.allclose(model.coefs.loc["(Intercept)", "Estimate"], intercept)
    assert np.allclose(model.coefs.iloc[1, 0], con1)
    assert np.allclose(model.coefs.iloc[2, 0], con2)

    # Polynomial contrasts
    model.fit(factors={"ROI": ["IPS", "V1", "AG"]},
              ordered=True,
              summarize=False)

    assert np.allclose(model.coefs.loc["(Intercept)", "Estimate"], grand_mean)
    assert np.allclose(model.coefs.iloc[1, 0], 0.870744)  # From R
    assert np.allclose(model.coefs.iloc[2, 0], 0.609262)  # From R

    # Custom contrasts
    model.fit(factors={"ROI": {
        "AG": 1,
        "IPS": -0.5,
        "V1": -0.5
    }},
              summarize=False)

    assert np.allclose(model.coefs.loc["(Intercept)", "Estimate"], grand_mean)
    assert np.allclose(model.coefs.iloc[1, 0], custom_contrast)
Ejemplo n.º 21
0
def test_post_hoc():
    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV ~ IV1*IV3*DV_l + (IV1|Group)", data=df, family="gaussian")
    model.fit(
        factors={"IV3": ["0.5", "1.0", "1.5"], "DV_l": ["0", "1"]}, summarize=False
    )

    marginal, contrasts = model.post_hoc(marginal_vars="IV3", p_adjust="dunnet")
    assert marginal.shape[0] == 3
    assert contrasts.shape[0] == 3

    marginal, contrasts = model.post_hoc(marginal_vars=["IV3", "DV_l"])
    assert marginal.shape[0] == 6
    assert contrasts.shape[0] == 15
Ejemplo n.º 22
0
def test_post_hoc():
    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
    model = Lmer('DV ~ IV1*IV3*DV_l + (IV1|Group)', data=df, family='gaussian')
    model.fit(factors={
        'IV3': ['0.5', '1.0', '1.5'],
        'DV_l': ['0', '1']
    },
              summarize=False)

    marginal, contrasts = model.post_hoc(marginal_vars='IV3',
                                         p_adjust='dunnet')
    assert marginal.shape[0] == 3
    assert contrasts.shape[0] == 3

    marginal, contrasts = model.post_hoc(marginal_vars=['IV3', 'DV_l'])
    assert marginal.shape[0] == 6
    assert contrasts.shape[0] == 15
Ejemplo n.º 23
0
df_two_groups = df.query("IV3 in [0.5, 1.0]").reset_index(drop=True)

# Fit new a model using a categorical predictor with unequal variances (WLS)
model = Lm("DV ~ IV3", data=df_two_groups)
print(model.fit(weights="IV3"))

###############################################################################
# Multi-level models
# ----------------------------
# Fitting a multi-level model works similarly and actually just calls :code:`lmer` or :code:`glmer` in R behind the scenes. The corresponding output is also formatted to be very similar to output of :code:`summary()` in R.

# Import the lmm model class
from pymer4.models import Lmer

# Initialize model instance using 1 predictor with random intercepts and slopes
model = Lmer("DV ~ IV2 + (IV2|Group)", data=df)

# Fit it
print(model.fit())

###############################################################################
# Similar to :code:`Lm` models, :code:`Lmer` models save details in model attributes and have additional methods that can be called using the same syntax as described above.

# Get population level coefficients
print(model.coefs)

###############################################################################

# Get group level coefficients (just the first 5)
# Each row here is a unique intercept and slope
# which vary because we parameterized our rfx that way above
Ejemplo n.º 24
0
# Because ANOVA is just regression, :code:`pymer4` can estimate ANOVA tables with F-results using the :code:`.anova()` method on a fitted model. This will compute a Type-III SS table given the coding scheme provided when the model was initially fit. Based on the distribution of data across factor levels and the specific coding-scheme used, this may produce invalid Type-III SS computations. For this reason the :code:`.anova()` method has a :code:`force-orthogonal=True` argument that will reparameterize and refit the model using orthogonal polynomial contrasts prior to computing an ANOVA table.
#
# Here we first estimate a mode with dummy-coded categories and suppress the summary output of :code:`.fit()`. Then we use :code:`.anova()` to examine the F-test results.

# import basic libraries and sample data
import os
import pandas as pd
from pymer4.utils import get_resource_path
from pymer4.models import Lmer

# IV3 is a categorical predictors with 3 levels in the sample data
df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))

# # We're going to fit a multi-level regression using the
# categorical predictor (IV3) which has 3 levels
model = Lmer("DV ~ IV3 + (1|Group)", data=df)

# Using dummy-coding; suppress summary output
model.fit(factors={"IV3": ["1.0", "0.5", "1.5"]}, summarize=False)

# Get ANOVA table
print(model.anova())

################################################################################
# Type III SS inferences will only be valid if data are fully balanced across levels or if contrasts between levels are orthogonally coded and sum to 0. Below we tell :code:`pymer4` to respecify our contrasts to ensure this before estimating the ANOVA. :code:`pymer4` also saves the last set of contrasts used priory to forcing orthogonality.
#
# Because the sample data is balanced across factor levels and there are not interaction terms, in this case orthogonal contrast coding doesn't change the results.

# Get ANOVA table, but this time force orthogonality
# for valid SS III inferences
# In this case the data are balanced so nothing changes
Ejemplo n.º 25
0
import numpy as np
from statsmodels.stats.api import anova_lm
import scipy
from dm_test import dm_test
import pdb
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr

if __name__ == "__main__":
    df_uid = pd.read_csv("./cross-linguistic-data-cleaned-uid.csv")
    df_rig = pd.read_csv("./cross-linguistic-data-cleaned-rig.csv")
    df_uid["rig_b_a_logit"] = df_rig["rig_b_a_logit"]

    # Load and checkout sample data
    model_uid = Lmer(
        "base_atom_order ~ 1.0 + uid_b_a_logit + (1.0|language_family) + (1.0|Subfamily)",
        data=df_uid,
        family="binomial")
    model_rig = Lmer(
        "base_atom_order ~ 1.0 + rig_b_a_logit + (1.0|language_family) + (1.0|Subfamily)",
        data=df_uid,
        family="binomial")
    model_total = Lmer(
        "base_atom_order ~ 1.0 + uid_b_a_logit + rig_b_a_logit + (1.0|language_family) + (1.0|Subfamily)",
        data=df_uid,
        family="binomial")

    #model = Lmer("base_atom_order ~ rig_b_a_prob + (rig_b_a_prob|language_family) + (rig_b_a_prob|Subfamily)", data=df)
    model_uid_fit = model_uid.fit()
    model_rig_fit = model_rig.fit()
    model_total_fit = model_total.fit()
    print(model_total_fit)
                    #curve = sg.filtfilt(np.ones(3)/3, [1, 0], curve)
                    x0 = linregress(np.linspace(0, 1, 30), curve).intercept
                    #x0 = curve[:15].mean()
                    curve = curve/x0 - 1
                    y_df = y_df.append(pd.DataFrame({'metric_type':metric_type, 'fb_type': fb_type, 'subj_id': 's'+str(subj_id), 'channel': ch, 'k': np.linspace(0, 1, 30), 'env': curve+0.0001, 'band': band}), ignore_index=True)



from pymer4.models import Lm, Lmer
from pymer4.utils import get_resource_path

for b, band in enumerate(['alpha', 'beta', 'theta']):
    for c, ch in enumerate(CHANNELS):
        for m, metric_type in enumerate(['magnitude', 'n_spindles', 'duration', 'amplitude']):
            data = y_df.query('metric_type=="{}" & channel=="{}" & band=="{}"'.format(metric_type, ch, band))
            model = Lmer('env ~ k:fb_type + (1 |subj_id)', data=data, )
            model.fit(factors={'fb_type': ['FB0', 'FB250', 'FB500', 'FBMock']})
            a = model.post_hoc('k', 'fb_type')[1]
            a['channel'] = ch
            a['metric_type'] = metric_type
            a['band'] = band
            a['P-val-full'] = stats.t.sf(a['T-stat'], 9)
            if c==0 and m==0 and b==0:
                all_stats_df = a.copy()
            else:
                all_stats_df = all_stats_df.append(a, ignore_index=True)
            print(ch, metric_type)

from mne.stats import fdr_correction

Ejemplo n.º 27
0
                              num_grps,
                              coef_vals=coef_vals,
                              mus=mus,
                              corrs=corrs)

print(f"True coefficients:\n{b}\n")
print(f"BLUPs:\n{blups.head()}\n")
print(f"Data:\n{data.head()}\n")

###############################################################################
# Again here are some checks you might do to make sure the data were correctly generated (by default lmm data will generally be a bit noisier due to within and across group/cluster variance; see the API for how to customize this):

# Group the data before running checks
group_data = data.groupby("Group")

###############################################################################
# Check mean of predictors within each group
print(group_data.apply(lambda grp: grp.iloc[:, 1:-1].mean(axis=0)))

###############################################################################
# Check correlations between predictors within each group
print(group_data.apply(lambda grp: grp.iloc[:, 1:-1].corr()))

###############################################################################
# Check coefficient recovery
from pymer4.models import Lmer

model = Lmer("DV ~ IV1+IV2+IV3 + (1|Group)", data=data)
model.fit(summarize=False)
print(model.coefs.loc[:, "Estimate"])
Ejemplo n.º 28
0
unique_blocks = list(stats_df['block_number'].unique())
stats_df['k'] = stats_df['block_number'].apply(lambda x: unique_blocks.index(x))
stats_df['subj_id_str'] = 's' + stats_df['subj_id'].astype('str')
stats_df = stats_df.query('k < 15')

import seaborn as sns

sns.catplot('fb_type', 'metric', kind='box', col='metric_type', sharey='col',
            data=stats_df.query('threshold_factor==2.75').groupby(['subj_id', 'fb_type', 'metric_type']).mean().reset_index())




stats_df = pd.read_pickle('release/data/{}.pkl'.format('channels1_bands1_splitedTrue_thresholds17'))
stats_df = stats_df.query('block_number==4 | block_number==36')
stats_df['block_name'] = stats_df['block_number'].apply(lambda x: 0 if x ==4 else 1)
stats_df['subj_id_str'] = 's' + stats_df['subj_id'].astype('str')

sns.catplot('block_name', 'metric', 'fb_type', kind='point', col='metric_type', sharey='col', data=stats_df.query('threshold_factor==2.75'), dodge=True)


from pymer4.models import Lm, Lmer
metric_type = 'duration'
threshold_factor = 2.75
data = stats_df.query('metric_type=="{}" & threshold_factor=={}'.format(metric_type, threshold_factor)).copy()
print(len(data))
data = data.replace([np.inf, -np.inf], np.nan)
data.loc[:, 'metric'] = data['metric'].fillna(data['metric'].min()).values
model = Lmer('metric ~ block_name:fb_type + (1 |subj_id_str)', data=data)
model.fit(factors={'fb_type': ['FB0', 'FB250', 'FB500', 'FBMock']})
model.post_hoc('block_name', 'fb_type')
Ejemplo n.º 29
0
def test_poisson_lmm():
    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    df["DV_int"] = np.random.randint(1, 10, df.shape[0])
    m = Lmer("DV_int ~ IV3 + (1|Group)", data=df, family="poisson")
    m.fit(summarize=False)
    assert m.family == "poisson"
    assert m.coefs.shape == (2, 7)
    assert "Z-stat" in m.coefs.columns

    # Test RFX only
    model = Lmer("DV_int ~ 0 + (IV1|Group)", data=df, family="poisson")
    model.fit(summarize=False)
    assert model.fixef.shape == (47, 2)

    model = Lmer("DV_int ~ 0 + (IV1|Group) + (1|IV3)",
                 data=df,
                 family="poisson")
    model.fit(summarize=False)
    assert isinstance(model.fixef, list)
    assert model.fixef[0].shape == (47, 2)
    assert model.fixef[1].shape == (3, 2)
Ejemplo n.º 30
0
def test_logistic_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV_l ~ IV1+ (IV1|Group)", data=df, family="binomial")
    model.fit(summarize=False)

    assert model.coefs.shape == (2, 13)
    estimates = np.array([-0.16098421, 0.00296261])
    assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001)

    assert isinstance(model.fixef, pd.core.frame.DataFrame)
    assert model.fixef.shape == (47, 2)

    assert isinstance(model.ranef, pd.core.frame.DataFrame)
    assert model.ranef.shape == (47, 2)

    assert np.allclose(model.coefs.loc[:, "Estimate"],
                       model.fixef.mean(),
                       atol=0.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True),
                       model.data.fits)
    assert np.allclose(
        model.predict(model.data, use_rfx=True, pred_type="link"),
        logit(model.data.fits),
    )

    # Test RFX only
    model = Lmer("DV_l ~ 0 + (IV1|Group)", data=df, family="binomial")
    model.fit(summarize=False)
    assert model.fixef.shape == (47, 2)

    model = Lmer("DV_l ~ 0 + (IV1|Group) + (1|IV3)",
                 data=df,
                 family="binomial")
    model.fit(summarize=False)
    assert isinstance(model.fixef, list)
    assert model.fixef[0].shape == (47, 2)
    assert model.fixef[1].shape == (3, 2)