コード例 #1
0
def fixed_effect_3level_model(dataframe):
    """
    Multi-level model_2_sci includes intercept, variables as fixed effect.

        :param dataframe: a data frame with student ID, school ID, country ID,
        science, math, reading, and other five selected variables as
        predictors.
        :return: the model results
    """
    # Fixed effects three-level model
    model_2_sci = Lmer(
        'log_science ~ IBTEACH + WEALTH '
        '+ ESCS + female + Sch_science_resource '
        '+ (1 | SchoolID/CountryID)',
        data=dataframe)
    # model must be fitted in order to get estimate results
    model_2_sci.fit(REML=False)
    # print summary since auto-generated result doesn't include fixed effects
    print(model_2_sci.summary())
    model_2_sci.plot_summary()
    sns.regplot(x='Sch_science_resource',
                y='residuals',
                data=model_2_sci.data,
                fit_reg=False)
    # Inspecting overall fit
    sns.regplot(x='fits',
                y='log_science',
                units='CountryID',
                data=model_2_sci.data,
                fit_reg=True)

    return model_2_sci
コード例 #2
0
def random_effect_2level_model(dataframe):
    """
    Multi-level model_1_sci includes intercept, variable as fixed and the
    interaction term
    random on country level.

        :param dataframe: a data frame with student ID, school ID, country ID,
        science, math, reading, and other five selected variables as
        predictors.
        :return: the model results
    """
    # Random intercept and slope two-level model:
    model_1_sci = Lmer('Science ~ female + (female*ESCS | CountryID)',
                       data=dataframe)
    # model must be fitted in order to get estimate results
    model_1_sci.fit(REML=False)
    # print summary since auto-generated result doesn't include fixed effects
    print(model_1_sci.summary())
    model_1_sci.plot_summary()
    # Visualizing random effect of a predictor
    model_1_sci.plot('female', plot_ci=True, ylabel='Predicted log_science')

    sns.regplot(x='female',
                y='residuals',
                data=model_1_sci.data,
                fit_reg=False)
    # Inspecting overall fit
    sns.regplot(x='fits',
                y='log_science',
                units='CountryID',
                data=model_1_sci.data,
                fit_reg=True)
    return model_1_sci
def run_linear_mixed_model_for_initialization(Y, G, cov, z):
    num_tests = Y.shape[1]
    F_betas = []
    C_betas = []
    residuals = []
    model_eq = 'y ~ g'
    for cov_num in range(cov.shape[1]):
        model_eq = model_eq + ' + x' + str(cov_num)
    model_eq = model_eq + ' + (1|z)'
    # 119, 103
    for test_number in range(num_tests):
        print(test_number)
        y_vec = Y[:, test_number]
        g_vec = G[:, test_number]
        dd = {'y': y_vec, 'z': z, 'g': g_vec}
        num_covs = cov.shape[1]
        for cov_num in range(num_covs):
            dd['x' + str(cov_num)] = cov[:, cov_num]
        df = pd.DataFrame(dd)
        model = Lmer(model_eq, data=df)
        model.fit()
        pdb.set_trace()
        residuals.append(model.residuals)
        print(
            np.mean(model.residuals / g_vec) / np.std(model.residuals / g_vec))
        print('\n')
        # no_re_pred = np.dot(cov[:,1:],model.coefs['Estimate'][2:]) + model.coefs['Estimate'][0] + model.coefs['Estimate'][1]*g_vec
    residuals = np.transpose(np.asarray(residuals))
    return residuals
コード例 #4
0
def mixeff_multinteraction2level_model(dataframe):
    """
    Multi-level model_5_sci includes intercept, multiple interactions and
    fixed effects,
     and setting ESCS as random on country level.

        :param dataframe: a data frame with student ID, school ID, country ID,
        science, math, reading, and other five selected variables as
        predictors.
        :return: the model results
    """
    # one random effect and multiple interactions between gender and factors
    model_5_sci = Lmer(
        'log_science ~ IBTEACH + WEALTH + ESCS + female + '
        'Sch_science_resource '
        '+ female*ESCS '
        '+ female*WEALTH + female*IBTEACH + (ESCS | CountryID)',
        data=dataframe)
    # model must be fitted in order to get estimate results
    model_5_sci.fit(REML=False)
    # print summary since auto-generated result doesn't include fixed effects
    print(model_5_sci.summary())
    model_5_sci.plot_summary()
    # Visualizing random effect of a predictor
    model_5_sci.plot('ESCS', plot_ci=True, ylabel='Predicted log_science')

    sns.regplot(x='ESCS', y='residuals', data=model_5_sci.data, fit_reg=False)
    # Inspecting overall fit
    sns.regplot(x='fits',
                y='log_science',
                units='CountryID',
                data=model_5_sci.data,
                fit_reg=True)
    return model_5_sci
コード例 #5
0
def random_intercept_3level_model(dataframe):
    """
    Multi-level model_0_sci includes grand-mean intercept and setting outcome
    of log science
    scores as random.

        :param dataframe: a data frame with student ID, school ID, country ID,
        science, math, reading, and other five selected variables as
        predictors.
        :return: the model results
    """
    # Random Intercept-only three-level model
    model_0_sci = Lmer('log_science ~ 1 | SchoolID/CountryID', data=dataframe)
    # model must be fitted in order to get estimate results
    model_0_sci.fit(REML=False)
    # print summary since auto-generated result doesn't include fixed effects
    print(model_0_sci.summary())
    # plot summary
    model_0_sci.plot_summary()
    # Inspecting overall fit
    sns.regplot(x='fits',
                y='log_science',
                units='CountryID',
                data=model_0_sci.data,
                fit_reg=True)
    return model_0_sci
コード例 #6
0
ファイル: test_models.py プロジェクト: miguel-uicab/pymer4
def test_logistic_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
    model = Lmer('DV_l ~ IV1+ (IV1|Group)', data=df, family='binomial')
    model.fit(summarize=False)

    assert model.coefs.shape == (2, 13)
    estimates = np.array([-0.16098421, 0.00296261])
    assert np.allclose(model.coefs['Estimate'], estimates, atol=.001)

    assert isinstance(model.fixef, pd.core.frame.DataFrame)
    assert model.fixef.shape == (47, 2)

    assert isinstance(model.ranef, pd.core.frame.DataFrame)
    assert model.ranef.shape == (47, 2)

    assert np.allclose(model.coefs.loc[:, 'Estimate'],
                       model.fixef.mean(),
                       atol=.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True),
                       model.data.fits)
    assert np.allclose(
        model.predict(model.data, use_rfx=True, pred_type='link'),
        logit(model.data.fits))
コード例 #7
0
def run_bootstrapped_eqtl_lmm_stability_one_test(expression, genotype,
                                                 covariates, individuals,
                                                 individual_to_cells,
                                                 num_bootstraps,
                                                 sampling_fraction):
    num_cov = covariates.shape[1]

    # Covariate matrix
    X = np.vstack((expression, individuals, genotype, covariates.T)).T
    # Create column names
    cov_names = ['cov' + str(i) for i in range(num_cov)]
    col_names = ['y', 'group', 'g'] + cov_names

    # Make df
    df = pd.DataFrame(X, columns=col_names)
    # Make formula for LMM
    if num_cov > 0:
        formula = 'y ~ g + ' + ' + '.join(cov_names) + ' + (1 | group)'
    else:
        formula = 'y ~ g + ' + '(1 | group)'

    bootstrapped_betas = []
    for bootstrap_num in range(num_bootstraps):
        print(bootstrap_num)
        indices = get_bootstrapped_indices(individuals, individual_to_cells,
                                           sampling_fraction)
        model = Lmer(formula, data=df.iloc[indices, :])
        model.fit()
        bootstrapped_beta = model.coefs['Estimate'][1]
        #bootstrapped_beta, bootstrapped_std_err, bootstrapped_pvalue = run_eqtl_one_test_lmm(expression[indices], genotype[indices], covariates[indices,:], individuals[indices])
        bootstrapped_betas.append(bootstrapped_beta)
    return np.asarray(bootstrapped_betas)
コード例 #8
0
def run_dynamic_eqtl_one_test_lmm(expression, genotype, covariates, groups,
                                  environmental_variable):
    num_cov = covariates.shape[1]
    # Covariate matrix
    X = np.vstack((expression, groups, genotype, environmental_variable,
                   environmental_variable * genotype, covariates.T)).T
    # Create column names
    cov_names = ['cov' + str(i) for i in range(num_cov)]
    col_names = ['y', 'group', 'g', 'e', 'gXe'] + cov_names

    # Make df
    df = pd.DataFrame(X, columns=col_names)
    # Make formula for LMM
    if num_cov > 0:
        formula = 'y ~ g + e + gXe + ' + ' + '.join(
            cov_names) + ' + (1 | group)'
    else:
        formula = 'y ~ g + e + gXe + ' + '(1 | group)'

    model = Lmer(formula, data=df)
    model.fit()

    beta = model.coefs['Estimate'][3]
    standard_error = model.coefs['SE'][3]
    pvalue = model.coefs['P-val'][3]
    #t_value = fit['T-stat'][1]
    #normal_approx_p = 2.0*(1.0 - scipy.stats.norm.cdf(abs(t_value)))
    #residual_scale = model.ranef_var.Std[1]
    return pvalue
コード例 #9
0
ファイル: test_models.py プロジェクト: miguel-uicab/pymer4
def test_gaussian_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
    model = Lmer('DV ~ IV3 + IV2 + (IV2|Group) + (1|IV3)', data=df)
    model.fit(summarize=False)

    assert model.coefs.shape == (3, 8)
    estimates = np.array([12.04334602, -1.52947016, 0.67768509])
    assert np.allclose(model.coefs['Estimate'], estimates, atol=.001)

    assert isinstance(model.fixef, list)
    assert model.fixef[0].shape == (47, 3)
    assert model.fixef[1].shape == (3, 3)

    assert isinstance(model.ranef, list)
    assert model.ranef[0].shape == (47, 2)
    assert model.ranef[1].shape == (3, 1)

    assert model.ranef_corr.shape == (1, 3)
    assert model.ranef_var.shape == (4, 3)

    assert np.allclose(model.coefs.loc[:, 'Estimate'],
                       model.fixef[0].mean(),
                       atol=.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True),
                       model.data.fits)
コード例 #10
0
def test_gaussian_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV ~ IV3 + IV2 + (IV2|Group) + (1|IV3)", data=df)
    opt_opts = "optimizer='Nelder_Mead', optCtrl = list(FtolAbs=1e-8, XtolRel=1e-8)"
    model.fit(summarize=False, control=opt_opts)

    assert model.coefs.shape == (3, 8)
    estimates = np.array([12.04334602, -1.52947016, 0.67768509])
    assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001)

    assert isinstance(model.fixef, list)
    assert model.fixef[0].shape == (47, 3)
    assert model.fixef[1].shape == (3, 3)

    assert isinstance(model.ranef, list)
    assert model.ranef[0].shape == (47, 2)
    assert model.ranef[1].shape == (3, 1)

    assert model.ranef_corr.shape == (1, 3)
    assert model.ranef_var.shape == (4, 3)

    assert np.allclose(model.coefs.loc[:, "Estimate"], model.fixef[0].mean(), atol=0.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True), model.data.fits)

    # Smoketest for simulate
    model.simulate(2)
    model.simulate(2, use_rfx=True)

    # Smoketest for old_optimizer
    model.fit(summarize=False, old_optimizer=True)
コード例 #11
0
ファイル: test_models.py プロジェクト: evgeni-nikolaev/pymer4
def test_glmer_opt_passing():
    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    df["DV_int"] = np.random.randint(1, 10, df.shape[0])
    m = Lmer("DV_int ~ IV3 + (1|Group)", data=df, family="poisson")
    m.fit(summarize=False,
          control="optCtrl = list(FtolAbs=1e-1, FtolRel=1e-1, maxfun=10)")
    assert len(m.warnings) >= 1
コード例 #12
0
ファイル: test_models.py プロジェクト: miguel-uicab/pymer4
def test_gamma_lmm():

    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
    df['DV_g'] = np.random.uniform(1, 2, size=df.shape[0])
    m = Lmer('DV_g ~ IV3 + (1|Group)', data=df, family='gamma')
    m.fit(summarize=False)
    assert m.family == 'gamma'
    assert m.coefs.shape == (2, 7)
コード例 #13
0
def test_anova():

    np.random.seed(1)
    data = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    data["DV_l2"] = np.random.randint(0, 4, data.shape[0])
    model = Lmer("DV ~ IV3*DV_l2 + (IV3|Group)", data=data)
    model.fit(summarize=False)
    out = model.anova()
    assert out.shape == (3, 7)
コード例 #14
0
ファイル: test_models.py プロジェクト: evgeni-nikolaev/pymer4
def test_inverse_gaussian_lmm():

    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    df["DV_g"] = np.random.uniform(1, 2, size=df.shape[0])
    m = Lmer("DV_g ~ IV3 + (1|Group)", data=df, family="inverse_gaussian")
    m.fit(summarize=False)
    assert m.family == "inverse_gaussian"
    assert m.coefs.shape == (2, 7)
コード例 #15
0
ファイル: test_models.py プロジェクト: miguel-uicab/pymer4
def test_poisson_lmm():
    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
    df['DV_int'] = np.random.randint(1, 10, df.shape[0])
    m = Lmer('DV_int ~ IV3 + (1|Group)', data=df, family='poisson')
    m.fit(summarize=False)
    assert m.family == 'poisson'
    assert m.coefs.shape == (2, 7)
    assert 'Z-stat' in m.coefs.columns
コード例 #16
0
def run_bootstrapped_eqtl_stability_with_residuals_one_test_v2(
        expression, genotype, covariates, individuals, individual_to_cells,
        num_bootstraps, sampling_fraction, seed):
    np.random.seed(seed)
    #residual_expression = regress_out_covariates(expression, covariates)
    #residual_genotype = regress_out_covariates(genotype, covariates)

    # Covariate matrix
    num_cov = covariates.shape[1]
    X = np.vstack(
        (expression, individuals.astype(str), genotype, covariates.T)).T
    # Create column names
    cov_names = ['cov' + str(i) for i in range(num_cov)]
    col_names = ['y', 'group', 'g'] + cov_names

    # Make df
    df = pd.DataFrame(X, columns=col_names)
    # Make formula for LMM
    if num_cov > 0:
        formula = 'y ~ g + ' + ' + '.join(cov_names) + ' + (1 | group)'
    else:
        formula = 'y ~ g + ' + '(1 | group)'

    model = Lmer(formula, data=df)
    model.fit()

    beta = model.coefs['Estimate'][1]
    standard_error = model.coefs['SE'][1]
    eqtl_pvalue = model.coefs['P-val'][1]
    bp_test = het_breuschpagan(model.residuals, np.vstack(genotype))
    pdb.set_trace()
    #X2 = sm.add_constant(X)
    #reg = LinearRegression().fit(X, expression)
    #est = sm.MixedLM(endog=expression, exog=X2, groups=individuals).fit()
    #est = sm.OLS(expression,X2).fit()
    #eqtl_pvalue = est.pvalues[1]
    #bp_test = het_breuschpagan(est.resid, np.vstack(genotype))
    #bp_test = het_breuschpagan(est.resid, X)
    #white_test = het_white(est.resid,X)
    #print(white_test)
    #model = ols(expression, X)
    #for bootstrap_num in range(num_bootstraps):
    #	indices = get_bootstrapped_indices(individuals, individual_to_cells, sampling_fraction)
    #	bootstrapped_beta = run_eqtl_on_residual_expression_one_test_lm(residual_expression[indices], genotype[indices])
    #bootstrapped_betas.append(bootstrapped_beta)
    #bootstrapped_perm_beta = run_eqtl_on_residual_expression_one_test_lm(residual_expression[indices], np.random.permutation(genotype[indices]))
    #bootstrapped_perm_betas.append(bootstrapped_perm_beta)
    #print(np.max(bootstrapped_betas) - np.min(bootstrapped_betas))
    #print(np.max(bootstrapped_perm_betas) - np.min(bootstrapped_perm_betas))
    #print(np.var(bootstrapped_betas))
    #print(np.var(bootstrapped_perm_betas))
    #print(np.mean(bootstrapped_betas))
    #print(np.mean(bootstrapped_perm_betas))
    return eqtl_pvalue, bp_test[3]
コード例 #17
0
def test_lmer_opt_passing():
    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV ~ IV2 + (IV2|Group)", data=df)
    opt_opts = "optCtrl = list(ftol_abs=1e-8, xtol_abs=1e-8)"
    model.fit(summarize=False, control=opt_opts)
    estimates = np.array([10.301072, 0.682124])
    assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001)
    assert len(model.warnings) == 0

    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV ~ IV2 + (IV2|Group)", data=df)
    opt_opts = "optCtrl = list(ftol_abs=1e-4, xtol_abs=1e-4)"
    model.fit(summarize=False, control=opt_opts)
    assert len(model.warnings) >= 1
コード例 #18
0
def test_post_hoc():
    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV ~ IV1*IV3*DV_l + (IV1|Group)", data=df, family="gaussian")
    model.fit(
        factors={"IV3": ["0.5", "1.0", "1.5"], "DV_l": ["0", "1"]}, summarize=False
    )

    marginal, contrasts = model.post_hoc(marginal_vars="IV3", p_adjust="dunnet")
    assert marginal.shape[0] == 3
    assert contrasts.shape[0] == 3

    marginal, contrasts = model.post_hoc(marginal_vars=["IV3", "DV_l"])
    assert marginal.shape[0] == 6
    assert contrasts.shape[0] == 15
コード例 #19
0
def get_tvals(measure, features, reverse=False):
    t_matrix = np.zeros((len(measure), len(stats)))
    p_matrix = np.zeros((len(measure), len(stats)))

    method_count = len(set(features['method']))

    for measure_index, net_index in list(
            it.product(range(len(measure)), range(len(stats)))):
        measure_stat = measure[measure_index]
        net_stat = stats[net_index]

        # create a smaller dataframe

        df = features[['userID', 'topic', 'method', measure_stat, net_stat]]
        df = df.rename(columns={
            measure_stat: 'measure_stat',
            net_stat: 'net_stat'
        })

        # run model
        if method_count > 1:  # if methods to compare
            model = Lmer(
                'measure_stat ~ net_stat  + (1 | topic ) + (1 | method)',
                data=df)
            model.fit(no_warnings=True, summarize=False)

        else:  # no method comparison
            model = Lmer('measure_stat ~ net_stat  + (1 | topic )', data=df)
            model.fit(no_warnings=True, summarize=False)

        # get t-vals
        t_val = model.coefs['T-stat']['net_stat']

        if np.isnan(t_val):
            t_val = 0
            print('Warning: no t_val found for method %s, feature %s.\
                Correlation estimated at 0.')

        t_matrix[measure_index][net_index] = t_val

        # get p-val
        p_val = model.coefs['P-val']['net_stat']
        p_matrix[measure_index][net_index] = p_val

    corr = pd.DataFrame(t_matrix.T, index=stats, columns=measure)

    return corr
コード例 #20
0
def test_install():
    """
    Quick function to test installation by import a lmm object and fitting a quick model.
    """
    try:
        from pymer4.models import Lmer
        from pymer4.utils import get_resource_path
        import os
        import pandas as pd
        import warnings
        warnings.filterwarnings("ignore")
        df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
        model = Lmer('DV ~ IV3 + (1|Group)', data=df)
        model.fit(summarize=False)
        print("Pymer4 installation working successfully!")
    except Exception as e:
        print("Error! {}".format(e))
コード例 #21
0
ファイル: test_models.py プロジェクト: miguel-uicab/pymer4
def test_post_hoc():
    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv'))
    model = Lmer('DV ~ IV1*IV3*DV_l + (IV1|Group)', data=df, family='gaussian')
    model.fit(factors={
        'IV3': ['0.5', '1.0', '1.5'],
        'DV_l': ['0', '1']
    },
              summarize=False)

    marginal, contrasts = model.post_hoc(marginal_vars='IV3',
                                         p_adjust='dunnet')
    assert marginal.shape[0] == 3
    assert contrasts.shape[0] == 3

    marginal, contrasts = model.post_hoc(marginal_vars=['IV3', 'DV_l'])
    assert marginal.shape[0] == 6
    assert contrasts.shape[0] == 15
コード例 #22
0
def test_logistic_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV_l ~ IV1+ (IV1|Group)", data=df, family="binomial")
    model.fit(summarize=False)

    assert model.coefs.shape == (2, 13)
    estimates = np.array([-0.16098421, 0.00296261])
    assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001)

    assert isinstance(model.fixef, pd.core.frame.DataFrame)
    assert model.fixef.shape == (47, 2)

    assert isinstance(model.ranef, pd.core.frame.DataFrame)
    assert model.ranef.shape == (47, 2)

    assert np.allclose(model.coefs.loc[:, "Estimate"], model.fixef.mean(), atol=0.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True), model.data.fits)
    assert np.allclose(
        model.predict(model.data, use_rfx=True, pred_type="link"),
        logit(model.data.fits),
    )

    # Test RFX only
    model = Lmer("DV_l ~ 0 + (IV1|Group)", data=df, family="binomial")
    model.fit(summarize=False)
    assert model.fixef.shape == (47, 2)

    model = Lmer("DV_l ~ 0 + (IV1|Group) + (1|IV3)", data=df, family="binomial")
    model.fit(summarize=False)
    assert isinstance(model.fixef, list)
    assert model.fixef[0].shape == (47, 2)
    assert model.fixef[1].shape == (3, 2)
コード例 #23
0
def test_poisson_lmm():
    np.random.seed(1)
    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    df["DV_int"] = np.random.randint(1, 10, df.shape[0])
    m = Lmer("DV_int ~ IV3 + (1|Group)", data=df, family="poisson")
    m.fit(summarize=False)
    assert m.family == "poisson"
    assert m.coefs.shape == (2, 7)
    assert "Z-stat" in m.coefs.columns

    # Test RFX only
    model = Lmer("DV_int ~ 0 + (IV1|Group)", data=df, family="poisson")
    model.fit(summarize=False)
    assert model.fixef.shape == (47, 2)

    model = Lmer("DV_int ~ 0 + (IV1|Group) + (1|IV3)", data=df, family="poisson")
    model.fit(summarize=False)
    assert isinstance(model.fixef, list)
    assert model.fixef[0].shape == (47, 2)
    assert model.fixef[1].shape == (3, 2)
コード例 #24
0
ファイル: test_models.py プロジェクト: evgeni-nikolaev/pymer4
def test_contrasts():
    df = sns.load_dataset("gammas").rename(columns={"BOLD signal": "bold"})
    grouped_means = df.groupby("ROI")["bold"].mean()
    model = Lmer("bold ~ ROI + (1|subject)", data=df)

    custom_contrast = grouped_means["AG"] - np.mean(
        [grouped_means["IPS"], grouped_means["V1"]])
    grand_mean = grouped_means.mean()

    con1 = grouped_means["V1"] - grouped_means["IPS"]
    con2 = grouped_means["AG"] - grouped_means["IPS"]
    intercept = grouped_means["IPS"]

    # Treatment contrasts with non-alphabetic order
    model.fit(factors={"ROI": ["IPS", "V1", "AG"]}, summarize=False)

    assert np.allclose(model.coefs.loc["(Intercept)", "Estimate"], intercept)
    assert np.allclose(model.coefs.iloc[1, 0], con1)
    assert np.allclose(model.coefs.iloc[2, 0], con2)

    # Polynomial contrasts
    model.fit(factors={"ROI": ["IPS", "V1", "AG"]},
              ordered=True,
              summarize=False)

    assert np.allclose(model.coefs.loc["(Intercept)", "Estimate"], grand_mean)
    assert np.allclose(model.coefs.iloc[1, 0], 0.870744)  # From R
    assert np.allclose(model.coefs.iloc[2, 0], 0.609262)  # From R

    # Custom contrasts
    model.fit(factors={"ROI": {
        "AG": 1,
        "IPS": -0.5,
        "V1": -0.5
    }},
              summarize=False)

    assert np.allclose(model.coefs.loc["(Intercept)", "Estimate"], grand_mean)
    assert np.allclose(model.coefs.iloc[1, 0], custom_contrast)
コード例 #25
0
ファイル: logistic_glm.py プロジェクト: nightingal3/uid-rig
    # Load and checkout sample data
    model_uid = Lmer(
        "base_atom_order ~ 1.0 + uid_b_a_logit + (1.0|language_family) + (1.0|Subfamily)",
        data=df_uid,
        family="binomial")
    model_rig = Lmer(
        "base_atom_order ~ 1.0 + rig_b_a_logit + (1.0|language_family) + (1.0|Subfamily)",
        data=df_uid,
        family="binomial")
    model_total = Lmer(
        "base_atom_order ~ 1.0 + uid_b_a_logit + rig_b_a_logit + (1.0|language_family) + (1.0|Subfamily)",
        data=df_uid,
        family="binomial")

    #model = Lmer("base_atom_order ~ rig_b_a_prob + (rig_b_a_prob|language_family) + (rig_b_a_prob|Subfamily)", data=df)
    model_uid_fit = model_uid.fit()
    model_rig_fit = model_rig.fit()
    model_total_fit = model_total.fit()
    print(model_total_fit)
    model_total_fit.plot_summary()
    assert False
    #table = anova_lm(model_uid.model_obj, model_rig.model_obj)
    #print(table)
    #assert False

    model_preds_uid = model_uid.predict(df_uid)
    model_preds_rig = model_rig.predict(df_rig)

    error_rig = model_preds_rig - df_rig["base_atom_order"]
    error_uid = model_preds_uid - df_uid["base_atom_order"]
コード例 #26
0
ファイル: example_03_posthoc.py プロジェクト: turbach/pymer4
# import basic libraries and sample data
import os
import pandas as pd
from pymer4.utils import get_resource_path
from pymer4.models import Lmer

# IV3 is a categorical predictors with 3 levels in the sample data
df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))

# # We're going to fit a multi-level regression using the
# categorical predictor (IV3) which has 3 levels
model = Lmer("DV ~ IV3 + (1|Group)", data=df)

# Using dummy-coding; suppress summary output
model.fit(factors={"IV3": ["1.0", "0.5", "1.5"]}, summarize=False)

# Get ANOVA table
print(model.anova())

################################################################################
# Type III SS inferences will only be valid if data are fully balanced across levels or if contrasts between levels are orthogonally coded and sum to 0. Below we tell :code:`pymer4` to respecify our contrasts to ensure this before estimating the ANOVA. :code:`pymer4` also saves the last set of contrasts used priory to forcing orthogonality.
#
# Because the sample data is balanced across factor levels and there are not interaction terms, in this case orthogonal contrast coding doesn't change the results.

# Get ANOVA table, but this time force orthogonality
# for valid SS III inferences
# In this case the data are balanced so nothing changes
print(model.anova(force_orthogonal=True))

################################################################################
コード例 #27
0
def test_simulate_lmm():

    # Simulate some data
    num_obs = 50
    num_coef = 3
    num_grps = 100
    mus = [10.0, 30.0, 2.0]
    coef_vals = [4.0, 1.8, -2, 10]
    corrs = 0.15
    data, blups, b = simulate_lmm(
        num_obs,
        num_coef,
        num_grps,
        coef_vals=coef_vals,
        mus=mus,
        corrs=corrs,
        noise_params=(0, 0.25),
        seed=4,
    )

    # Check data shape (add 2 for DV and group columns)
    assert data.shape == (num_obs * num_grps, num_coef + 2)

    # Check group shapes
    group_data = data.groupby("Group")
    assert group_data.ngroups == num_grps
    assert (group_data.apply(lambda grp: grp.shape == (num_obs, num_coef + 2))
            ).all()

    # Check coefficients are as specified
    assert np.allclose(b, coef_vals)

    # Check blups are close to population values
    # True - Generated < .25
    np.allclose(coef_vals, blups.mean(axis=0), atol=0.25)

    # Check column means within groups, i.e. random intercepts
    # True - Generated < 1.1
    assert (group_data.apply(lambda grp: np.allclose(
        grp.iloc[:, 1:-1].mean(axis=0), mus, atol=1.1))).all()

    # Check correlations within group
    # True - Generated < .5
    def grp_corr(grp):
        corr = grp.iloc[:, 1:-1].corr().values
        corr = corr[np.triu_indices(corr.shape[0], k=1)]
        return corr

    assert (group_data.apply(
        lambda grp: (np.abs(grp_corr(grp) - corrs) < 0.5).all())).all()

    # Model simulated data
    m = Lmer("DV ~ IV1+IV2+IV3 + (IV1+IV2+IV3|Group)", data=data)
    m.fit(summarize=False)

    # Check random effects variance
    # True - Generated < .25
    assert np.allclose(m.ranef_var.iloc[1:-1, -1], corrs, atol=0.25)

    # Check parameter recovery
    # True - Recovered < .15 for params and < 1 for intercept
    assert (np.abs(m.coefs.iloc[1:, 0] - b[1:]) < 0.15).all()
    assert (np.abs(m.coefs.iloc[0, 0] - b[0]) < 1).all()

    # Check BLUP recovery
    # mean(True - Generated) < .5 (sigma)
    assert np.abs((m.fixef.values - blups.values).ravel()).mean() < 0.5
コード例 #28
0
def run_models(
        model_data=r'C:\Users\K1774755\Downloads\phd\mmse_rebecca\mmse_synthetic_data_20190919.xlsx',
        to_predict='score_combined',
        key='brcid',
        covariates=None,
        timestamps=('score_date_centered', ),
        complete_case=False,
        models=('linear_rdn_int', 'linear_rdn_all_no_intercept',
                'linear_rdn_all', 'quadratic_rdn_int'),
        output_file_path=None):
    if isinstance(model_data,
                  str) and 'xlsx' in model_data:  # load regression data
        model_data = pd.read_excel(model_data, index_col=None)
    if covariates is not None:  # check covariates actually exist in the model data
        if not all(elem in model_data.columns for elem in list(covariates)):
            print('covariates entered do not exist in input data')
            return pd.DataFrame(
                {'output': 'failure - covariates not in input data'},
                index=[0])
    if complete_case:
        print('all cases:', len(model_data), 'observations, ',
              len(model_data[key].unique()), 'patients')
        model_data = model_data.replace({
            'not known': np.nan,
            'Not Known': np.nan,
            'unknown': np.nan,
            'Unknown': np.nan,
            '[nan-nan]': np.nan
        })
        model_data = model_data.dropna(subset=list(covariates), how='any')
        print('only complete cases:', len(model_data), 'observations, ',
              len(model_data[key].unique()), 'patients')
    if output_file_path is not None:
        st = datetime.datetime.fromtimestamp(
            time.time()).strftime('%Y%m%d-%Hh%M')
        writer = pd.ExcelWriter(output_file_path.replace(
            '.xlsx', st + '.xlsx'),
                                engine='xlsxwriter')

    res = []
    col_num = 0
    for patient_group in list(
            model_data.patient_diagnosis_super_class.unique()):
        df_tmp = model_data[model_data.patient_diagnosis_super_class == patient_group] \
            if patient_group != 'all' else model_data
        row_num = 0
        for ts in timestamps:
            for m in models:
                print('running model:', m, '(patient group:', patient_group,
                      ', timestamp:', ts, ')')
                formula = lmer_formula(model_type=m,
                                       regressor=to_predict,
                                       timestamp=ts,
                                       covariates=covariates,
                                       group=key)
                print('using formula', formula)
                model = Lmer(formula, data=df_tmp)
                try:
                    model.fit(REML=True)
                    if model.warnings is not None:  # try unrestricted MLE if convergence failed
                        model.fit(REML=False)
                    to_print = print_r_model_output(model)
                except:
                    print('something went wrong with model fitting')
                    to_print = pd.DataFrame({'output': 'failure'}, index=[0])
                to_print = pd.concat([to_print],
                                     keys=[patient_group],
                                     names=[m])

                if output_file_path is not None:
                    to_print.to_excel(writer,
                                      startrow=row_num,
                                      startcol=col_num)
                    row_num += 2 + len(to_print)
                else:
                    res = res.append(to_print)

        if output_file_path is not None: col_num += to_print.shape[1] + 3
    if output_file_path is not None: writer.save()
    return res
コード例 #29
0
                    x0 = linregress(np.linspace(0, 1, 30), curve).intercept
                    #x0 = curve[:15].mean()
                    curve = curve/x0 - 1
                    y_df = y_df.append(pd.DataFrame({'metric_type':metric_type, 'fb_type': fb_type, 'subj_id': 's'+str(subj_id), 'channel': ch, 'k': np.linspace(0, 1, 30), 'env': curve+0.0001, 'band': band}), ignore_index=True)



from pymer4.models import Lm, Lmer
from pymer4.utils import get_resource_path

for b, band in enumerate(['alpha', 'beta', 'theta']):
    for c, ch in enumerate(CHANNELS):
        for m, metric_type in enumerate(['magnitude', 'n_spindles', 'duration', 'amplitude']):
            data = y_df.query('metric_type=="{}" & channel=="{}" & band=="{}"'.format(metric_type, ch, band))
            model = Lmer('env ~ k:fb_type + (1 |subj_id)', data=data, )
            model.fit(factors={'fb_type': ['FB0', 'FB250', 'FB500', 'FBMock']})
            a = model.post_hoc('k', 'fb_type')[1]
            a['channel'] = ch
            a['metric_type'] = metric_type
            a['band'] = band
            a['P-val-full'] = stats.t.sf(a['T-stat'], 9)
            if c==0 and m==0 and b==0:
                all_stats_df = a.copy()
            else:
                all_stats_df = all_stats_df.append(a, ignore_index=True)
            print(ch, metric_type)

from mne.stats import fdr_correction


data = np.zeros((3, 6, 4, 32))
コード例 #30
0
ファイル: test_models.py プロジェクト: evgeni-nikolaev/pymer4
def test_gaussian_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV ~ IV3 + IV2 + (IV2|Group) + (1|IV3)", data=df)
    opt_opts = "optimizer='Nelder_Mead', optCtrl = list(FtolAbs=1e-8, XtolRel=1e-8)"
    model.fit(summarize=False, control=opt_opts)

    assert model.coefs.shape == (3, 8)
    estimates = np.array([12.04334602, -1.52947016, 0.67768509])
    assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001)

    assert isinstance(model.fixef, list)
    assert (model.fixef[0].index.astype(int) == df.Group.unique()).all()
    assert (model.fixef[1].index.astype(float) == df.IV3.unique()).all()
    assert model.fixef[0].shape == (47, 3)
    assert model.fixef[1].shape == (3, 3)

    assert isinstance(model.ranef, list)
    assert model.ranef[0].shape == (47, 2)
    assert model.ranef[1].shape == (3, 1)
    assert (model.ranef[1].index == ["0.5", "1", "1.5"]).all()

    assert model.ranef_corr.shape == (1, 3)
    assert model.ranef_var.shape == (4, 3)

    assert np.allclose(model.coefs.loc[:, "Estimate"],
                       model.fixef[0].mean(),
                       atol=0.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True),
                       model.data.fits)

    # Test simulate
    out = model.simulate(2)
    assert isinstance(out, pd.DataFrame)
    assert out.shape == (model.data.shape[0], 2)

    out = model.simulate(2, use_rfx=True)
    assert isinstance(out, pd.DataFrame)
    assert out.shape == (model.data.shape[0], 2)

    # Smoketest for old_optimizer
    model.fit(summarize=False, old_optimizer=True)

    # test fixef code for 1 fixed effect
    model = Lmer("DV ~ IV3 + IV2 + (IV2|Group)", data=df)
    model.fit(summarize=False, control=opt_opts)

    assert (model.fixef.index.astype(int) == df.Group.unique()).all()
    assert model.fixef.shape == (47, 3)
    assert np.allclose(model.coefs.loc[:, "Estimate"],
                       model.fixef.mean(),
                       atol=0.01)

    # test fixef code for 0 fixed effects
    model = Lmer("DV ~ (IV2|Group) + (1|IV3)", data=df)
    model.fit(summarize=False, control=opt_opts)

    assert isinstance(model.fixef, list)
    assert (model.fixef[0].index.astype(int) == df.Group.unique()).all()
    assert (model.fixef[1].index.astype(float) == df.IV3.unique()).all()
    assert model.fixef[0].shape == (47, 2)
    assert model.fixef[1].shape == (3, 2)