def run_dynamic_eqtl_one_test_lmm(expression, genotype, covariates, groups, environmental_variable): num_cov = covariates.shape[1] # Covariate matrix X = np.vstack((expression, groups, genotype, environmental_variable, environmental_variable * genotype, covariates.T)).T # Create column names cov_names = ['cov' + str(i) for i in range(num_cov)] col_names = ['y', 'group', 'g', 'e', 'gXe'] + cov_names # Make df df = pd.DataFrame(X, columns=col_names) # Make formula for LMM if num_cov > 0: formula = 'y ~ g + e + gXe + ' + ' + '.join( cov_names) + ' + (1 | group)' else: formula = 'y ~ g + e + gXe + ' + '(1 | group)' model = Lmer(formula, data=df) model.fit() beta = model.coefs['Estimate'][3] standard_error = model.coefs['SE'][3] pvalue = model.coefs['P-val'][3] #t_value = fit['T-stat'][1] #normal_approx_p = 2.0*(1.0 - scipy.stats.norm.cdf(abs(t_value))) #residual_scale = model.ranef_var.Std[1] return pvalue
def run_linear_mixed_model_for_initialization(Y, G, cov, z): num_tests = Y.shape[1] F_betas = [] C_betas = [] residuals = [] model_eq = 'y ~ g' for cov_num in range(cov.shape[1]): model_eq = model_eq + ' + x' + str(cov_num) model_eq = model_eq + ' + (1|z)' # 119, 103 for test_number in range(num_tests): print(test_number) y_vec = Y[:, test_number] g_vec = G[:, test_number] dd = {'y': y_vec, 'z': z, 'g': g_vec} num_covs = cov.shape[1] for cov_num in range(num_covs): dd['x' + str(cov_num)] = cov[:, cov_num] df = pd.DataFrame(dd) model = Lmer(model_eq, data=df) model.fit() pdb.set_trace() residuals.append(model.residuals) print( np.mean(model.residuals / g_vec) / np.std(model.residuals / g_vec)) print('\n') # no_re_pred = np.dot(cov[:,1:],model.coefs['Estimate'][2:]) + model.coefs['Estimate'][0] + model.coefs['Estimate'][1]*g_vec residuals = np.transpose(np.asarray(residuals)) return residuals
def run_bootstrapped_eqtl_lmm_stability_one_test(expression, genotype, covariates, individuals, individual_to_cells, num_bootstraps, sampling_fraction): num_cov = covariates.shape[1] # Covariate matrix X = np.vstack((expression, individuals, genotype, covariates.T)).T # Create column names cov_names = ['cov' + str(i) for i in range(num_cov)] col_names = ['y', 'group', 'g'] + cov_names # Make df df = pd.DataFrame(X, columns=col_names) # Make formula for LMM if num_cov > 0: formula = 'y ~ g + ' + ' + '.join(cov_names) + ' + (1 | group)' else: formula = 'y ~ g + ' + '(1 | group)' bootstrapped_betas = [] for bootstrap_num in range(num_bootstraps): print(bootstrap_num) indices = get_bootstrapped_indices(individuals, individual_to_cells, sampling_fraction) model = Lmer(formula, data=df.iloc[indices, :]) model.fit() bootstrapped_beta = model.coefs['Estimate'][1] #bootstrapped_beta, bootstrapped_std_err, bootstrapped_pvalue = run_eqtl_one_test_lmm(expression[indices], genotype[indices], covariates[indices,:], individuals[indices]) bootstrapped_betas.append(bootstrapped_beta) return np.asarray(bootstrapped_betas)
def test_gaussian_lmm(): df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv')) model = Lmer('DV ~ IV3 + IV2 + (IV2|Group) + (1|IV3)', data=df) model.fit(summarize=False) assert model.coefs.shape == (3, 8) estimates = np.array([12.04334602, -1.52947016, 0.67768509]) assert np.allclose(model.coefs['Estimate'], estimates, atol=.001) assert isinstance(model.fixef, list) assert model.fixef[0].shape == (47, 3) assert model.fixef[1].shape == (3, 3) assert isinstance(model.ranef, list) assert model.ranef[0].shape == (47, 2) assert model.ranef[1].shape == (3, 1) assert model.ranef_corr.shape == (1, 3) assert model.ranef_var.shape == (4, 3) assert np.allclose(model.coefs.loc[:, 'Estimate'], model.fixef[0].mean(), atol=.01) # Test prediction assert np.allclose(model.predict(model.data, use_rfx=True), model.data.fits)
def test_glmer_opt_passing(): np.random.seed(1) df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv")) df["DV_int"] = np.random.randint(1, 10, df.shape[0]) m = Lmer("DV_int ~ IV3 + (1|Group)", data=df, family="poisson") m.fit(summarize=False, control="optCtrl = list(FtolAbs=1e-1, FtolRel=1e-1, maxfun=10)") assert len(m.warnings) >= 1
def test_anova(): np.random.seed(1) data = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv")) data["DV_l2"] = np.random.randint(0, 4, data.shape[0]) model = Lmer("DV ~ IV3*DV_l2 + (IV3|Group)", data=data) model.fit(summarize=False) out = model.anova() assert out.shape == (3, 7)
def test_inverse_gaussian_lmm(): np.random.seed(1) df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv")) df["DV_g"] = np.random.uniform(1, 2, size=df.shape[0]) m = Lmer("DV_g ~ IV3 + (1|Group)", data=df, family="inverse_gaussian") m.fit(summarize=False) assert m.family == "inverse_gaussian" assert m.coefs.shape == (2, 7)
def test_gamma_lmm(): np.random.seed(1) df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv')) df['DV_g'] = np.random.uniform(1, 2, size=df.shape[0]) m = Lmer('DV_g ~ IV3 + (1|Group)', data=df, family='gamma') m.fit(summarize=False) assert m.family == 'gamma' assert m.coefs.shape == (2, 7)
def test_poisson_lmm(): np.random.seed(1) df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv')) df['DV_int'] = np.random.randint(1, 10, df.shape[0]) m = Lmer('DV_int ~ IV3 + (1|Group)', data=df, family='poisson') m.fit(summarize=False) assert m.family == 'poisson' assert m.coefs.shape == (2, 7) assert 'Z-stat' in m.coefs.columns
def mixeff_multinteraction2level_model(dataframe): """ Multi-level model_5_sci includes intercept, multiple interactions and fixed effects, and setting ESCS as random on country level. :param dataframe: a data frame with student ID, school ID, country ID, science, math, reading, and other five selected variables as predictors. :return: the model results """ # one random effect and multiple interactions between gender and factors model_5_sci = Lmer( 'log_science ~ IBTEACH + WEALTH + ESCS + female + ' 'Sch_science_resource ' '+ female*ESCS ' '+ female*WEALTH + female*IBTEACH + (ESCS | CountryID)', data=dataframe) # model must be fitted in order to get estimate results model_5_sci.fit(REML=False) # print summary since auto-generated result doesn't include fixed effects print(model_5_sci.summary()) model_5_sci.plot_summary() # Visualizing random effect of a predictor model_5_sci.plot('ESCS', plot_ci=True, ylabel='Predicted log_science') sns.regplot(x='ESCS', y='residuals', data=model_5_sci.data, fit_reg=False) # Inspecting overall fit sns.regplot(x='fits', y='log_science', units='CountryID', data=model_5_sci.data, fit_reg=True) return model_5_sci
def random_effect_2level_model(dataframe): """ Multi-level model_1_sci includes intercept, variable as fixed and the interaction term random on country level. :param dataframe: a data frame with student ID, school ID, country ID, science, math, reading, and other five selected variables as predictors. :return: the model results """ # Random intercept and slope two-level model: model_1_sci = Lmer('Science ~ female + (female*ESCS | CountryID)', data=dataframe) # model must be fitted in order to get estimate results model_1_sci.fit(REML=False) # print summary since auto-generated result doesn't include fixed effects print(model_1_sci.summary()) model_1_sci.plot_summary() # Visualizing random effect of a predictor model_1_sci.plot('female', plot_ci=True, ylabel='Predicted log_science') sns.regplot(x='female', y='residuals', data=model_1_sci.data, fit_reg=False) # Inspecting overall fit sns.regplot(x='fits', y='log_science', units='CountryID', data=model_1_sci.data, fit_reg=True) return model_1_sci
def run_bootstrapped_eqtl_stability_with_residuals_one_test_v2( expression, genotype, covariates, individuals, individual_to_cells, num_bootstraps, sampling_fraction, seed): np.random.seed(seed) #residual_expression = regress_out_covariates(expression, covariates) #residual_genotype = regress_out_covariates(genotype, covariates) # Covariate matrix num_cov = covariates.shape[1] X = np.vstack( (expression, individuals.astype(str), genotype, covariates.T)).T # Create column names cov_names = ['cov' + str(i) for i in range(num_cov)] col_names = ['y', 'group', 'g'] + cov_names # Make df df = pd.DataFrame(X, columns=col_names) # Make formula for LMM if num_cov > 0: formula = 'y ~ g + ' + ' + '.join(cov_names) + ' + (1 | group)' else: formula = 'y ~ g + ' + '(1 | group)' model = Lmer(formula, data=df) model.fit() beta = model.coefs['Estimate'][1] standard_error = model.coefs['SE'][1] eqtl_pvalue = model.coefs['P-val'][1] bp_test = het_breuschpagan(model.residuals, np.vstack(genotype)) pdb.set_trace() #X2 = sm.add_constant(X) #reg = LinearRegression().fit(X, expression) #est = sm.MixedLM(endog=expression, exog=X2, groups=individuals).fit() #est = sm.OLS(expression,X2).fit() #eqtl_pvalue = est.pvalues[1] #bp_test = het_breuschpagan(est.resid, np.vstack(genotype)) #bp_test = het_breuschpagan(est.resid, X) #white_test = het_white(est.resid,X) #print(white_test) #model = ols(expression, X) #for bootstrap_num in range(num_bootstraps): # indices = get_bootstrapped_indices(individuals, individual_to_cells, sampling_fraction) # bootstrapped_beta = run_eqtl_on_residual_expression_one_test_lm(residual_expression[indices], genotype[indices]) #bootstrapped_betas.append(bootstrapped_beta) #bootstrapped_perm_beta = run_eqtl_on_residual_expression_one_test_lm(residual_expression[indices], np.random.permutation(genotype[indices])) #bootstrapped_perm_betas.append(bootstrapped_perm_beta) #print(np.max(bootstrapped_betas) - np.min(bootstrapped_betas)) #print(np.max(bootstrapped_perm_betas) - np.min(bootstrapped_perm_betas)) #print(np.var(bootstrapped_betas)) #print(np.var(bootstrapped_perm_betas)) #print(np.mean(bootstrapped_betas)) #print(np.mean(bootstrapped_perm_betas)) return eqtl_pvalue, bp_test[3]
def test_install(): """ Quick function to test installation by import a lmm object and fitting a quick model. """ try: from pymer4.models import Lmer from pymer4.utils import get_resource_path import os import pandas as pd import warnings warnings.filterwarnings("ignore") df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv')) model = Lmer('DV ~ IV3 + (1|Group)', data=df) model.fit(summarize=False) print("Pymer4 installation working successfully!") except Exception as e: print("Error! {}".format(e))
def fixed_effect_3level_model(dataframe): """ Multi-level model_2_sci includes intercept, variables as fixed effect. :param dataframe: a data frame with student ID, school ID, country ID, science, math, reading, and other five selected variables as predictors. :return: the model results """ # Fixed effects three-level model model_2_sci = Lmer( 'log_science ~ IBTEACH + WEALTH ' '+ ESCS + female + Sch_science_resource ' '+ (1 | SchoolID/CountryID)', data=dataframe) # model must be fitted in order to get estimate results model_2_sci.fit(REML=False) # print summary since auto-generated result doesn't include fixed effects print(model_2_sci.summary()) model_2_sci.plot_summary() sns.regplot(x='Sch_science_resource', y='residuals', data=model_2_sci.data, fit_reg=False) # Inspecting overall fit sns.regplot(x='fits', y='log_science', units='CountryID', data=model_2_sci.data, fit_reg=True) return model_2_sci
def random_intercept_3level_model(dataframe): """ Multi-level model_0_sci includes grand-mean intercept and setting outcome of log science scores as random. :param dataframe: a data frame with student ID, school ID, country ID, science, math, reading, and other five selected variables as predictors. :return: the model results """ # Random Intercept-only three-level model model_0_sci = Lmer('log_science ~ 1 | SchoolID/CountryID', data=dataframe) # model must be fitted in order to get estimate results model_0_sci.fit(REML=False) # print summary since auto-generated result doesn't include fixed effects print(model_0_sci.summary()) # plot summary model_0_sci.plot_summary() # Inspecting overall fit sns.regplot(x='fits', y='log_science', units='CountryID', data=model_0_sci.data, fit_reg=True) return model_0_sci
def test_lmer_opt_passing(): df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv")) model = Lmer("DV ~ IV2 + (IV2|Group)", data=df) opt_opts = "optCtrl = list(ftol_abs=1e-8, xtol_abs=1e-8)" model.fit(summarize=False, control=opt_opts) estimates = np.array([10.301072, 0.682124]) assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001) assert len(model.warnings) == 0 df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv")) model = Lmer("DV ~ IV2 + (IV2|Group)", data=df) opt_opts = "optCtrl = list(ftol_abs=1e-4, xtol_abs=1e-4)" model.fit(summarize=False, control=opt_opts) assert len(model.warnings) >= 1
def test_logistic_lmm(): df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv')) model = Lmer('DV_l ~ IV1+ (IV1|Group)', data=df, family='binomial') model.fit(summarize=False) assert model.coefs.shape == (2, 13) estimates = np.array([-0.16098421, 0.00296261]) assert np.allclose(model.coefs['Estimate'], estimates, atol=.001) assert isinstance(model.fixef, pd.core.frame.DataFrame) assert model.fixef.shape == (47, 2) assert isinstance(model.ranef, pd.core.frame.DataFrame) assert model.ranef.shape == (47, 2) assert np.allclose(model.coefs.loc[:, 'Estimate'], model.fixef.mean(), atol=.01) # Test prediction assert np.allclose(model.predict(model.data, use_rfx=True), model.data.fits) assert np.allclose( model.predict(model.data, use_rfx=True, pred_type='link'), logit(model.data.fits))
def test_gaussian_lmm(): df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv")) model = Lmer("DV ~ IV3 + IV2 + (IV2|Group) + (1|IV3)", data=df) opt_opts = "optimizer='Nelder_Mead', optCtrl = list(FtolAbs=1e-8, XtolRel=1e-8)" model.fit(summarize=False, control=opt_opts) assert model.coefs.shape == (3, 8) estimates = np.array([12.04334602, -1.52947016, 0.67768509]) assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001) assert isinstance(model.fixef, list) assert model.fixef[0].shape == (47, 3) assert model.fixef[1].shape == (3, 3) assert isinstance(model.ranef, list) assert model.ranef[0].shape == (47, 2) assert model.ranef[1].shape == (3, 1) assert (model.ranef[1].index == ["0.5", "1", "1.5"]).all() assert model.ranef_corr.shape == (1, 3) assert model.ranef_var.shape == (4, 3) assert np.allclose(model.coefs.loc[:, "Estimate"], model.fixef[0].mean(), atol=0.01) # Test prediction assert np.allclose(model.predict(model.data, use_rfx=True), model.data.fits) # Test simulate out = model.simulate(2) assert isinstance(out, pd.DataFrame) assert out.shape == (model.data.shape[0], 2) out = model.simulate(2, use_rfx=True) assert isinstance(out, pd.DataFrame) assert out.shape == (model.data.shape[0], 2) # Smoketest for old_optimizer model.fit(summarize=False, old_optimizer=True)
def get_tvals(measure, features, reverse=False): t_matrix = np.zeros((len(measure), len(stats))) p_matrix = np.zeros((len(measure), len(stats))) method_count = len(set(features['method'])) for measure_index, net_index in list( it.product(range(len(measure)), range(len(stats)))): measure_stat = measure[measure_index] net_stat = stats[net_index] # create a smaller dataframe df = features[['userID', 'topic', 'method', measure_stat, net_stat]] df = df.rename(columns={ measure_stat: 'measure_stat', net_stat: 'net_stat' }) # run model if method_count > 1: # if methods to compare model = Lmer( 'measure_stat ~ net_stat + (1 | topic ) + (1 | method)', data=df) model.fit(no_warnings=True, summarize=False) else: # no method comparison model = Lmer('measure_stat ~ net_stat + (1 | topic )', data=df) model.fit(no_warnings=True, summarize=False) # get t-vals t_val = model.coefs['T-stat']['net_stat'] if np.isnan(t_val): t_val = 0 print('Warning: no t_val found for method %s, feature %s.\ Correlation estimated at 0.') t_matrix[measure_index][net_index] = t_val # get p-val p_val = model.coefs['P-val']['net_stat'] p_matrix[measure_index][net_index] = p_val corr = pd.DataFrame(t_matrix.T, index=stats, columns=measure) return corr
def test_contrasts(): df = sns.load_dataset("gammas").rename(columns={"BOLD signal": "bold"}) grouped_means = df.groupby("ROI")["bold"].mean() model = Lmer("bold ~ ROI + (1|subject)", data=df) custom_contrast = grouped_means["AG"] - np.mean( [grouped_means["IPS"], grouped_means["V1"]]) grand_mean = grouped_means.mean() con1 = grouped_means["V1"] - grouped_means["IPS"] con2 = grouped_means["AG"] - grouped_means["IPS"] intercept = grouped_means["IPS"] # Treatment contrasts with non-alphabetic order model.fit(factors={"ROI": ["IPS", "V1", "AG"]}, summarize=False) assert np.allclose(model.coefs.loc["(Intercept)", "Estimate"], intercept) assert np.allclose(model.coefs.iloc[1, 0], con1) assert np.allclose(model.coefs.iloc[2, 0], con2) # Polynomial contrasts model.fit(factors={"ROI": ["IPS", "V1", "AG"]}, ordered=True, summarize=False) assert np.allclose(model.coefs.loc["(Intercept)", "Estimate"], grand_mean) assert np.allclose(model.coefs.iloc[1, 0], 0.870744) # From R assert np.allclose(model.coefs.iloc[2, 0], 0.609262) # From R # Custom contrasts model.fit(factors={"ROI": { "AG": 1, "IPS": -0.5, "V1": -0.5 }}, summarize=False) assert np.allclose(model.coefs.loc["(Intercept)", "Estimate"], grand_mean) assert np.allclose(model.coefs.iloc[1, 0], custom_contrast)
def test_post_hoc(): np.random.seed(1) df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv")) model = Lmer("DV ~ IV1*IV3*DV_l + (IV1|Group)", data=df, family="gaussian") model.fit( factors={"IV3": ["0.5", "1.0", "1.5"], "DV_l": ["0", "1"]}, summarize=False ) marginal, contrasts = model.post_hoc(marginal_vars="IV3", p_adjust="dunnet") assert marginal.shape[0] == 3 assert contrasts.shape[0] == 3 marginal, contrasts = model.post_hoc(marginal_vars=["IV3", "DV_l"]) assert marginal.shape[0] == 6 assert contrasts.shape[0] == 15
def test_post_hoc(): np.random.seed(1) df = pd.read_csv(os.path.join(get_resource_path(), 'sample_data.csv')) model = Lmer('DV ~ IV1*IV3*DV_l + (IV1|Group)', data=df, family='gaussian') model.fit(factors={ 'IV3': ['0.5', '1.0', '1.5'], 'DV_l': ['0', '1'] }, summarize=False) marginal, contrasts = model.post_hoc(marginal_vars='IV3', p_adjust='dunnet') assert marginal.shape[0] == 3 assert contrasts.shape[0] == 3 marginal, contrasts = model.post_hoc(marginal_vars=['IV3', 'DV_l']) assert marginal.shape[0] == 6 assert contrasts.shape[0] == 15
df_two_groups = df.query("IV3 in [0.5, 1.0]").reset_index(drop=True) # Fit new a model using a categorical predictor with unequal variances (WLS) model = Lm("DV ~ IV3", data=df_two_groups) print(model.fit(weights="IV3")) ############################################################################### # Multi-level models # ---------------------------- # Fitting a multi-level model works similarly and actually just calls :code:`lmer` or :code:`glmer` in R behind the scenes. The corresponding output is also formatted to be very similar to output of :code:`summary()` in R. # Import the lmm model class from pymer4.models import Lmer # Initialize model instance using 1 predictor with random intercepts and slopes model = Lmer("DV ~ IV2 + (IV2|Group)", data=df) # Fit it print(model.fit()) ############################################################################### # Similar to :code:`Lm` models, :code:`Lmer` models save details in model attributes and have additional methods that can be called using the same syntax as described above. # Get population level coefficients print(model.coefs) ############################################################################### # Get group level coefficients (just the first 5) # Each row here is a unique intercept and slope # which vary because we parameterized our rfx that way above
# Because ANOVA is just regression, :code:`pymer4` can estimate ANOVA tables with F-results using the :code:`.anova()` method on a fitted model. This will compute a Type-III SS table given the coding scheme provided when the model was initially fit. Based on the distribution of data across factor levels and the specific coding-scheme used, this may produce invalid Type-III SS computations. For this reason the :code:`.anova()` method has a :code:`force-orthogonal=True` argument that will reparameterize and refit the model using orthogonal polynomial contrasts prior to computing an ANOVA table. # # Here we first estimate a mode with dummy-coded categories and suppress the summary output of :code:`.fit()`. Then we use :code:`.anova()` to examine the F-test results. # import basic libraries and sample data import os import pandas as pd from pymer4.utils import get_resource_path from pymer4.models import Lmer # IV3 is a categorical predictors with 3 levels in the sample data df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv")) # # We're going to fit a multi-level regression using the # categorical predictor (IV3) which has 3 levels model = Lmer("DV ~ IV3 + (1|Group)", data=df) # Using dummy-coding; suppress summary output model.fit(factors={"IV3": ["1.0", "0.5", "1.5"]}, summarize=False) # Get ANOVA table print(model.anova()) ################################################################################ # Type III SS inferences will only be valid if data are fully balanced across levels or if contrasts between levels are orthogonally coded and sum to 0. Below we tell :code:`pymer4` to respecify our contrasts to ensure this before estimating the ANOVA. :code:`pymer4` also saves the last set of contrasts used priory to forcing orthogonality. # # Because the sample data is balanced across factor levels and there are not interaction terms, in this case orthogonal contrast coding doesn't change the results. # Get ANOVA table, but this time force orthogonality # for valid SS III inferences # In this case the data are balanced so nothing changes
import numpy as np from statsmodels.stats.api import anova_lm import scipy from dm_test import dm_test import pdb import rpy2.robjects as robjects from rpy2.robjects.packages import importr if __name__ == "__main__": df_uid = pd.read_csv("./cross-linguistic-data-cleaned-uid.csv") df_rig = pd.read_csv("./cross-linguistic-data-cleaned-rig.csv") df_uid["rig_b_a_logit"] = df_rig["rig_b_a_logit"] # Load and checkout sample data model_uid = Lmer( "base_atom_order ~ 1.0 + uid_b_a_logit + (1.0|language_family) + (1.0|Subfamily)", data=df_uid, family="binomial") model_rig = Lmer( "base_atom_order ~ 1.0 + rig_b_a_logit + (1.0|language_family) + (1.0|Subfamily)", data=df_uid, family="binomial") model_total = Lmer( "base_atom_order ~ 1.0 + uid_b_a_logit + rig_b_a_logit + (1.0|language_family) + (1.0|Subfamily)", data=df_uid, family="binomial") #model = Lmer("base_atom_order ~ rig_b_a_prob + (rig_b_a_prob|language_family) + (rig_b_a_prob|Subfamily)", data=df) model_uid_fit = model_uid.fit() model_rig_fit = model_rig.fit() model_total_fit = model_total.fit() print(model_total_fit)
#curve = sg.filtfilt(np.ones(3)/3, [1, 0], curve) x0 = linregress(np.linspace(0, 1, 30), curve).intercept #x0 = curve[:15].mean() curve = curve/x0 - 1 y_df = y_df.append(pd.DataFrame({'metric_type':metric_type, 'fb_type': fb_type, 'subj_id': 's'+str(subj_id), 'channel': ch, 'k': np.linspace(0, 1, 30), 'env': curve+0.0001, 'band': band}), ignore_index=True) from pymer4.models import Lm, Lmer from pymer4.utils import get_resource_path for b, band in enumerate(['alpha', 'beta', 'theta']): for c, ch in enumerate(CHANNELS): for m, metric_type in enumerate(['magnitude', 'n_spindles', 'duration', 'amplitude']): data = y_df.query('metric_type=="{}" & channel=="{}" & band=="{}"'.format(metric_type, ch, band)) model = Lmer('env ~ k:fb_type + (1 |subj_id)', data=data, ) model.fit(factors={'fb_type': ['FB0', 'FB250', 'FB500', 'FBMock']}) a = model.post_hoc('k', 'fb_type')[1] a['channel'] = ch a['metric_type'] = metric_type a['band'] = band a['P-val-full'] = stats.t.sf(a['T-stat'], 9) if c==0 and m==0 and b==0: all_stats_df = a.copy() else: all_stats_df = all_stats_df.append(a, ignore_index=True) print(ch, metric_type) from mne.stats import fdr_correction
num_grps, coef_vals=coef_vals, mus=mus, corrs=corrs) print(f"True coefficients:\n{b}\n") print(f"BLUPs:\n{blups.head()}\n") print(f"Data:\n{data.head()}\n") ############################################################################### # Again here are some checks you might do to make sure the data were correctly generated (by default lmm data will generally be a bit noisier due to within and across group/cluster variance; see the API for how to customize this): # Group the data before running checks group_data = data.groupby("Group") ############################################################################### # Check mean of predictors within each group print(group_data.apply(lambda grp: grp.iloc[:, 1:-1].mean(axis=0))) ############################################################################### # Check correlations between predictors within each group print(group_data.apply(lambda grp: grp.iloc[:, 1:-1].corr())) ############################################################################### # Check coefficient recovery from pymer4.models import Lmer model = Lmer("DV ~ IV1+IV2+IV3 + (1|Group)", data=data) model.fit(summarize=False) print(model.coefs.loc[:, "Estimate"])
unique_blocks = list(stats_df['block_number'].unique()) stats_df['k'] = stats_df['block_number'].apply(lambda x: unique_blocks.index(x)) stats_df['subj_id_str'] = 's' + stats_df['subj_id'].astype('str') stats_df = stats_df.query('k < 15') import seaborn as sns sns.catplot('fb_type', 'metric', kind='box', col='metric_type', sharey='col', data=stats_df.query('threshold_factor==2.75').groupby(['subj_id', 'fb_type', 'metric_type']).mean().reset_index()) stats_df = pd.read_pickle('release/data/{}.pkl'.format('channels1_bands1_splitedTrue_thresholds17')) stats_df = stats_df.query('block_number==4 | block_number==36') stats_df['block_name'] = stats_df['block_number'].apply(lambda x: 0 if x ==4 else 1) stats_df['subj_id_str'] = 's' + stats_df['subj_id'].astype('str') sns.catplot('block_name', 'metric', 'fb_type', kind='point', col='metric_type', sharey='col', data=stats_df.query('threshold_factor==2.75'), dodge=True) from pymer4.models import Lm, Lmer metric_type = 'duration' threshold_factor = 2.75 data = stats_df.query('metric_type=="{}" & threshold_factor=={}'.format(metric_type, threshold_factor)).copy() print(len(data)) data = data.replace([np.inf, -np.inf], np.nan) data.loc[:, 'metric'] = data['metric'].fillna(data['metric'].min()).values model = Lmer('metric ~ block_name:fb_type + (1 |subj_id_str)', data=data) model.fit(factors={'fb_type': ['FB0', 'FB250', 'FB500', 'FBMock']}) model.post_hoc('block_name', 'fb_type')
def test_poisson_lmm(): np.random.seed(1) df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv")) df["DV_int"] = np.random.randint(1, 10, df.shape[0]) m = Lmer("DV_int ~ IV3 + (1|Group)", data=df, family="poisson") m.fit(summarize=False) assert m.family == "poisson" assert m.coefs.shape == (2, 7) assert "Z-stat" in m.coefs.columns # Test RFX only model = Lmer("DV_int ~ 0 + (IV1|Group)", data=df, family="poisson") model.fit(summarize=False) assert model.fixef.shape == (47, 2) model = Lmer("DV_int ~ 0 + (IV1|Group) + (1|IV3)", data=df, family="poisson") model.fit(summarize=False) assert isinstance(model.fixef, list) assert model.fixef[0].shape == (47, 2) assert model.fixef[1].shape == (3, 2)
def test_logistic_lmm(): df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv")) model = Lmer("DV_l ~ IV1+ (IV1|Group)", data=df, family="binomial") model.fit(summarize=False) assert model.coefs.shape == (2, 13) estimates = np.array([-0.16098421, 0.00296261]) assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001) assert isinstance(model.fixef, pd.core.frame.DataFrame) assert model.fixef.shape == (47, 2) assert isinstance(model.ranef, pd.core.frame.DataFrame) assert model.ranef.shape == (47, 2) assert np.allclose(model.coefs.loc[:, "Estimate"], model.fixef.mean(), atol=0.01) # Test prediction assert np.allclose(model.predict(model.data, use_rfx=True), model.data.fits) assert np.allclose( model.predict(model.data, use_rfx=True, pred_type="link"), logit(model.data.fits), ) # Test RFX only model = Lmer("DV_l ~ 0 + (IV1|Group)", data=df, family="binomial") model.fit(summarize=False) assert model.fixef.shape == (47, 2) model = Lmer("DV_l ~ 0 + (IV1|Group) + (1|IV3)", data=df, family="binomial") model.fit(summarize=False) assert isinstance(model.fixef, list) assert model.fixef[0].shape == (47, 2) assert model.fixef[1].shape == (3, 2)