Ejemplo n.º 1
0
 def _mixed_effects_comp(self, ser1, ser2):
     groups = [
         self.user_indices[i.split('_')[0]] for i in ser1.index.tolist()
     ]
     exo = ser1.values
     endo = ser2.values
     try:
         model = mlm.MixedLM(np.array(endo), np.array(exo),
                             np.array(groups))
         results = model.fit()
         summary = results.summary()
         l_logger.debug("Summary %r" % (summary, ))
         if 'x1' in summary.tables[1].index:
             coeff = float(summary.tables[1].loc['x1', 'Coef.'])
             pv = results.pvalues[0]
         else:
             l_logger.warning("no result")
             #this happens when a vector has 0 entropy
             coeff = None
             pv = 1.0
     except:
         coeff = None
         pv = 1.0
         l_logger.exception("Error computing mixed model")
         raise
     return (coeff, pv)
Ejemplo n.º 2
0
 def fitmlm(self):
     import pandas as pd
     from statsmodels.regression import mixed_linear_model
     mlm = mixed_linear_model.MixedLM(endog=pd.DataFrame(self.response),
                                      exog=pd.DataFrame(self.fixed),
                                      groups=pd.DataFrame(self.random),
                                      formula='response ~ fixed')
     mlmf = mlm.fit()
     return mlmf
Ejemplo n.º 3
0
 def summarymlm(self):
     import pandas as pd
     from statsmodels.regression import mixed_linear_model
     mlm = mixed_linear_model.MixedLM(endog=pd.DataFrame(self.response),
                                      exog=pd.DataFrame(self.fixed),
                                      groups=pd.DataFrame(self.random),
                                      formula='response ~ fixed')
     mlmf = mlm.fit()
     print(" ")
     print("The summary of the linear mixed effects model is given below:")
     return mlmf.summary()
Ejemplo n.º 4
0
    def plotmlm(self):
        import seaborn as sns
        sns.set()
        import pandas as pd
        from statsmodels.regression import mixed_linear_model
        mlm = mixed_linear_model.MixedLM(endog=pd.DataFrame(self.response),
                                         exog=pd.DataFrame(self.fixed),
                                         groups=pd.DataFrame(self.random),
                                         formula='response ~ fixed')
        mlmf = mlm.fit()
        db_plot = pd.DataFrame()
        db_plot["residuals"] = mlmf.resid.values
        db_plot["fixed"] = fixed
        db_plot["predicted"] = mlmf.fittedvalues

        sns.lmplot(x="predicted", y="residuals", data=dbplot)
Ejemplo n.º 5
0
result_pca.plot_scree()

#Question1: identify variables that correlate with GFP expression
#####Multivariate Linear Regression Model, selection of predictor variables see p.385 R.Johnson
#GFP intens mean can be seen as count variable, that can be modeled using possion/negbino link in GLM.

##[Linear mixed effects model] can be performed for the effects of High/Midium/Low LNP dose

######Here starts the question2: Does internuclear distance correlate with GFP expression?(nuc intens mean/compactness v.s. GFP intens)
# import statsmodels.formula.api as smf
# mixed_model = smf.mixedlm("'GFP intens Mean' ~ 'Nuc intens Mean'", df_mix, groups=df_mix["LNP dose"])
import statsmodels.regression.mixed_linear_model as smm
import statsmodels.regression.mixed_linear_model as smm

Mixed_model = smm.MixedLM(endog=df_mix['GFP intens Mean'].to_numpy(),
                          exog=df_mix['Nuc intens Mean'].to_numpy(),
                          groups=df_mix['LNP dose'],
                          missing='drop')  #the model
result_lmm = Mixed_model.fit()
result_lmm.summary()
#result_lmm.summary().as_latex()

####With only distance variables
df_mix_distance = df_mix.rename(
    columns={
        "Cells no border - Distance from GFP bright Mean":
        "Distance from GFP bright Mean",
        "Cells no border - Distance intens Mean": "Distance intens Mean"
    })
df_mix_distance = df_mix_distance.loc[:, [
    'LNP dose', 'GFP intens Mean', 'Nuc intens Mean',
    'Distance from GFP bright Mean', 'Distance intens Mean'
Ejemplo n.º 6
0
def model_playground():
    df = pd.read_excel(
        r'C:\Users\K1774755\Downloads\phd\mmse_rebecca\mmse_synthetic_data_20190919.xlsx',
        index_col=None)
    df_smi = df[df.patient_diagnosis_super_class == 'smi only']
    df_orga = df[df.patient_diagnosis_super_class == 'organic only']
    df_smi_orga = df[df.patient_diagnosis_super_class == 'smi+organic']
    df_to_use = df_orga

    # MODEL 1: basic model (random intercept and fixed slope)
    model = Lmer('score_combined ~ score_date_centered  + (1|brcid)',
                 data=df_to_use)  # MMSE score by year
    model = Lmer('score_combined ~ score_date_centered  + gender + (1|brcid)',
                 data=df_to_use
                 )  # adding age at baseline as covariate (is this correct??)
    to_print = print_r_model_output(model.fit())
    # MODEL 2: random intercept and random slope
    model = Lmer('score_combined ~  (score_date_centered  | brcid)',
                 data=df_to_use)  # this removes the intercept?
    model = Lmer('score_combined ~  (score_date_centered  + gender| brcid)',
                 data=df_to_use)

    model = Lmer(
        "score_combined ~ score_date_centered + (1 + score_date_centered | brcid)",
        data=df_to_use)  # correct one?
    model = Lmer(
        'score_combined ~  score_date_centered + gender + (1 + score_date_centered | brcid)',
        data=df_to_use)

    model = Lmer(
        'score_combined ~  1 + score_date_centered  + (1|brcid) + (0 + score_date_centered  | brcid)',
        data=df)  # 2 random effects constrained to be uncorrelated

    # MODEL 3: basic model but quadratic
    model = Lmer(
        'score_combined ~ score_date_centered  + I(score_date_centered ^2) + (1|brcid)',
        data=df_to_use)

    print(model.fit())

    #######################################################################################
    #  PYTHON STUFF
    # R formula:
    r_formula = 'score_combined ~  score_date_centered + age_at_score_baseline + patient_diagnosis_super_class + score_date_centered * age_at_score_baseline + score_date_centered * patient_diagnosis_super_class'

    # MODEL 1: python equivalent
    model_py = smf.mixedlm("score_combined ~ score_date_centered ",
                           df_to_use,
                           groups=df_to_use['brcid'])
    result = model_py.fit()
    print(model_py.fit().summary())

    # random slope and intercept
    model_py = smf.mixedlm(r_formula,
                           df_to_use,
                           groups=df_to_use['brcid'],
                           re_formula="~score_date_centered")
    model_py = sm.MixedLM.from_formula(r_formula,
                                       df_to_use,
                                       re_formula="score_date_centered ",
                                       groups=df_to_use['brcid'])
    # random slope only
    model_py = sm.MixedLM.from_formula("score_combined ~ score_date_centered ",
                                       df_to_use,
                                       re_formula="0 + score_date_centered ",
                                       groups=df_to_use['brcid'])

    # MODEL 2: python equivalent (??)
    vcf = {
        "score_date_centered ": "0 + C(score_date_centered )",
        "brcid": "0 + C(brcid)"
    }
    model_py = sm.MixedLM.from_formula("score_combined ~ score_date_centered ",
                                       groups=df_to_use['brcid'],
                                       vc_formula=vcf,
                                       re_formula="0",
                                       data=df_to_use)
    print(model_py.fit().summary())

    model3 = mlm.MixedLM(
        endog=df_to_use['score_combined'],  # dependent variable (1D))
        exog=df_to_use[['score_date_centered ',
                        'intercept']],  # fixed effect covariates (2D)
        exog_re=df_to_use['intercept'],  # random effect covariates
        groups=df_to_use['brcid']
    )  # data from different groups are independent
    result = model3.fit()
    print(result.summary())