def _mixed_effects_comp(self, ser1, ser2): groups = [ self.user_indices[i.split('_')[0]] for i in ser1.index.tolist() ] exo = ser1.values endo = ser2.values try: model = mlm.MixedLM(np.array(endo), np.array(exo), np.array(groups)) results = model.fit() summary = results.summary() l_logger.debug("Summary %r" % (summary, )) if 'x1' in summary.tables[1].index: coeff = float(summary.tables[1].loc['x1', 'Coef.']) pv = results.pvalues[0] else: l_logger.warning("no result") #this happens when a vector has 0 entropy coeff = None pv = 1.0 except: coeff = None pv = 1.0 l_logger.exception("Error computing mixed model") raise return (coeff, pv)
def fitmlm(self): import pandas as pd from statsmodels.regression import mixed_linear_model mlm = mixed_linear_model.MixedLM(endog=pd.DataFrame(self.response), exog=pd.DataFrame(self.fixed), groups=pd.DataFrame(self.random), formula='response ~ fixed') mlmf = mlm.fit() return mlmf
def summarymlm(self): import pandas as pd from statsmodels.regression import mixed_linear_model mlm = mixed_linear_model.MixedLM(endog=pd.DataFrame(self.response), exog=pd.DataFrame(self.fixed), groups=pd.DataFrame(self.random), formula='response ~ fixed') mlmf = mlm.fit() print(" ") print("The summary of the linear mixed effects model is given below:") return mlmf.summary()
def plotmlm(self): import seaborn as sns sns.set() import pandas as pd from statsmodels.regression import mixed_linear_model mlm = mixed_linear_model.MixedLM(endog=pd.DataFrame(self.response), exog=pd.DataFrame(self.fixed), groups=pd.DataFrame(self.random), formula='response ~ fixed') mlmf = mlm.fit() db_plot = pd.DataFrame() db_plot["residuals"] = mlmf.resid.values db_plot["fixed"] = fixed db_plot["predicted"] = mlmf.fittedvalues sns.lmplot(x="predicted", y="residuals", data=dbplot)
result_pca.plot_scree() #Question1: identify variables that correlate with GFP expression #####Multivariate Linear Regression Model, selection of predictor variables see p.385 R.Johnson #GFP intens mean can be seen as count variable, that can be modeled using possion/negbino link in GLM. ##[Linear mixed effects model] can be performed for the effects of High/Midium/Low LNP dose ######Here starts the question2: Does internuclear distance correlate with GFP expression?(nuc intens mean/compactness v.s. GFP intens) # import statsmodels.formula.api as smf # mixed_model = smf.mixedlm("'GFP intens Mean' ~ 'Nuc intens Mean'", df_mix, groups=df_mix["LNP dose"]) import statsmodels.regression.mixed_linear_model as smm import statsmodels.regression.mixed_linear_model as smm Mixed_model = smm.MixedLM(endog=df_mix['GFP intens Mean'].to_numpy(), exog=df_mix['Nuc intens Mean'].to_numpy(), groups=df_mix['LNP dose'], missing='drop') #the model result_lmm = Mixed_model.fit() result_lmm.summary() #result_lmm.summary().as_latex() ####With only distance variables df_mix_distance = df_mix.rename( columns={ "Cells no border - Distance from GFP bright Mean": "Distance from GFP bright Mean", "Cells no border - Distance intens Mean": "Distance intens Mean" }) df_mix_distance = df_mix_distance.loc[:, [ 'LNP dose', 'GFP intens Mean', 'Nuc intens Mean', 'Distance from GFP bright Mean', 'Distance intens Mean'
def model_playground(): df = pd.read_excel( r'C:\Users\K1774755\Downloads\phd\mmse_rebecca\mmse_synthetic_data_20190919.xlsx', index_col=None) df_smi = df[df.patient_diagnosis_super_class == 'smi only'] df_orga = df[df.patient_diagnosis_super_class == 'organic only'] df_smi_orga = df[df.patient_diagnosis_super_class == 'smi+organic'] df_to_use = df_orga # MODEL 1: basic model (random intercept and fixed slope) model = Lmer('score_combined ~ score_date_centered + (1|brcid)', data=df_to_use) # MMSE score by year model = Lmer('score_combined ~ score_date_centered + gender + (1|brcid)', data=df_to_use ) # adding age at baseline as covariate (is this correct??) to_print = print_r_model_output(model.fit()) # MODEL 2: random intercept and random slope model = Lmer('score_combined ~ (score_date_centered | brcid)', data=df_to_use) # this removes the intercept? model = Lmer('score_combined ~ (score_date_centered + gender| brcid)', data=df_to_use) model = Lmer( "score_combined ~ score_date_centered + (1 + score_date_centered | brcid)", data=df_to_use) # correct one? model = Lmer( 'score_combined ~ score_date_centered + gender + (1 + score_date_centered | brcid)', data=df_to_use) model = Lmer( 'score_combined ~ 1 + score_date_centered + (1|brcid) + (0 + score_date_centered | brcid)', data=df) # 2 random effects constrained to be uncorrelated # MODEL 3: basic model but quadratic model = Lmer( 'score_combined ~ score_date_centered + I(score_date_centered ^2) + (1|brcid)', data=df_to_use) print(model.fit()) ####################################################################################### # PYTHON STUFF # R formula: r_formula = 'score_combined ~ score_date_centered + age_at_score_baseline + patient_diagnosis_super_class + score_date_centered * age_at_score_baseline + score_date_centered * patient_diagnosis_super_class' # MODEL 1: python equivalent model_py = smf.mixedlm("score_combined ~ score_date_centered ", df_to_use, groups=df_to_use['brcid']) result = model_py.fit() print(model_py.fit().summary()) # random slope and intercept model_py = smf.mixedlm(r_formula, df_to_use, groups=df_to_use['brcid'], re_formula="~score_date_centered") model_py = sm.MixedLM.from_formula(r_formula, df_to_use, re_formula="score_date_centered ", groups=df_to_use['brcid']) # random slope only model_py = sm.MixedLM.from_formula("score_combined ~ score_date_centered ", df_to_use, re_formula="0 + score_date_centered ", groups=df_to_use['brcid']) # MODEL 2: python equivalent (??) vcf = { "score_date_centered ": "0 + C(score_date_centered )", "brcid": "0 + C(brcid)" } model_py = sm.MixedLM.from_formula("score_combined ~ score_date_centered ", groups=df_to_use['brcid'], vc_formula=vcf, re_formula="0", data=df_to_use) print(model_py.fit().summary()) model3 = mlm.MixedLM( endog=df_to_use['score_combined'], # dependent variable (1D)) exog=df_to_use[['score_date_centered ', 'intercept']], # fixed effect covariates (2D) exog_re=df_to_use['intercept'], # random effect covariates groups=df_to_use['brcid'] ) # data from different groups are independent result = model3.fit() print(result.summary())