def anova(self, title=None, empty=True, ems=None): """ returns an ANOVA table for the linear model """ if ems is None: ems = defaults["show_ems"] Y = self.Y X = self.X values = self.values # method # if X.df_error == 0: # hopkins = True e_ms = _hopkins_ems(X) # else: # hopkins = False # table head table = textab.Table("l" + "r" * (5 + ems)) if title: table.title(title) elif self.title: table.title(self.title) # for msg in X.check(): # table.caption('! '+msg) table.cell() headers = ["SS", "df", "MS"] if ems: headers += ["E(MS)"] headers += ["F", "p"] for hd in headers: table.cell(hd, r"\textbf", just="c") table.midrule() if isbalanced(X): # MS for factors (Needed for models involving random effects) self.MS = [] for i, name, index, df in X.iter_effects(): SS = np.sum(values[:, index].sum(1) ** 2) self.MS.append(SS / df) else: raise NotImplementedError() tests = {} for e in X.effects: # effect to test m0effects = [] for e0 in X.effects: # effect in model0 if e0 is e: pass elif all([f in e0.factors for f in e.factors]): pass else: m0effects.append(e0) model0 = model(m0effects) model1 = model0 + e SS, df, MS, F, p = incremental_F_test(Y, model1, model0) tests[e.name] = dict(SS=SS, df=df, MS=MS, F=F, p=p) # table body self.results = {} for i, name, index, df in X.iter_effects(): SS = np.sum(values[:, index].sum(1) ** 2) # if v: print name, index, SS MS = SS / df # self.results[name] = {'SS':SS, 'df':df, 'MS':MS} if e_ms[i] != None: # hopkins and e_ms_i = e_ms[i] MS_d = self.MS[e_ms_i] df_d = X.effects[e_ms_i].df e_ms_name = X.effects[e_ms_i].name elif self.df_res > 0: df_d = self.df_res MS_d = self.MS_res e_ms_name = "Res" else: MS_d = False e_ms_name = None # F-test if MS_d != False: F = MS / MS_d p = 1 - sp.stats.distributions.f.cdf(F, df, df_d) stars = test.star(p) tex_stars = textab.Stars(stars) F_tex = [F, tex_stars] else: F_tex = None p = None # add to table if e_ms_name or empty: table.cell(name) table.cell(SS) table.cell(df, fmt="%i") table.cell(MS) if ems: table.cell(e_ms_name) table.cell(F_tex, mat=True) table.cell(p, fmt=defaults["p_fmt"], drop0=True) # store results self.results[name] = {"SS": SS, "df": df, "MS": MS, "E(MS)": e_ms_name, "F": F, "p": p} # self.indexes[name] = index # for self.Ysub() # table end if self.df_res > 0: table.cell("Residuals") table.cell(self.SS_res) table.cell(self.df_res, fmt="%i") table.cell(self.MS_res) return table
def __init__(self, Y, X, sub=None, title=None, empty=True, ems=None, lsq=0, showall=False): """ Returns an ANOVA table for the linear model. Mixed effects models require full model specification so that E(MS) can be estimated according to Hopkins (1976) Random effects: If the model is fully specified, a Hopkins E(MS) table is used to determine error terms in the mixed effects model. Otherwise, random factors are treated as fixed factors. kwargs ------ empty: include rows without F-Tests (True/False) ems: display source of E(MS) for F-Tests (True/False; None = use default) lsq: least square fitter = 0 -> numpy.linalg.lstsq = 1 -> after Fox showall: show SS, df and MS for effects without F test TODO ---- - sort model - reuse lms which are used repeatedly - provide threshold for including interaction effects when testing lower level effects Problem with unbalanced models ------------------------------ - The SS of Effects which do not include the between-subject factor are higher than in SPSS - The SS of effects which include the between-subject factor agree with SPSS """ # prepare kwargs Y = asvar(Y) X = asmodel(X) if sub is not None: Y = Y[sub] X = X[sub] assert Y.N == X.N # save args self.Y = Y self.X = X self.title = title self.show_ems = ems self._log = [] # decide which E(MS) model to use if X.df_error == 0: rfx = 1 fx_desc = "Mixed" elif X.df_error > 0: rfx = 0 fx_desc = "Fixed" else: raise ValueError("Model Overdetermined") self._log.append("%s effects model" % fx_desc) if lsq == 1: self._log.append("(my lsq)") elif lsq == 0: self._log.append("\n (np lsq)") # create testing table: # list of (effect, lm, lm_comp, lm_EMS) test_table = [] # # list of (name, SS, df, MS, F, p) results_table = [] if len(X.effects) == 1: self._log.append("single factor model") lm0 = lm(Y, X, lsq=lsq) SS = lm0.SS_model df = lm0.df_model MS = lm0.MS_model F, p = lm0.F_test() results_table.append((X.name, SS, df, MS, F, p)) results_table.append(("Residuals", lm0.SS_res, lm0.df_res, lm0.MS_res, None, None)) else: if not rfx: full_lm = lm(Y, X, lsq=lsq) SS_e = full_lm.SS_res MS_e = full_lm.MS_res df_e = full_lm.df_res for e_test in X.effects: skip = False name = e_test.name # find model 0 effects = [] excluded_e = [] for e in X.effects: # determine whether e_test if e is e_test: pass else: if _is_higher_order(e, e_test): excluded_e.append(e) else: effects.append(e) model0 = model(*effects) if e_test.df > model0.df_error: skip = "overspecified" else: lm0 = lm(Y, model0, lsq=lsq) # find model 1 effects.append(e_test) model1 = model(*effects) if model1.df_error > 0: lm1 = lm(Y, model1, lsq=lsq) else: lm1 = None if rfx: # find E(MS) EMS_effects = [] for e in X.effects: if e is e_test: pass elif all([(f in e_test or f.random) for f in e.factors]): if all([(f in e or e.nestedin(f)) for f in e_test.factors]): EMS_effects.append(e) if len(EMS_effects) > 0: lm_EMS = lm(Y, model(*EMS_effects), lsq=lsq) MS_e = lm_EMS.MS_model df_e = lm_EMS.df_model else: if showall: if lm1 is None: SS = lm0.SS_res df = lm0.df_res else: SS = lm0.SS_res - lm1.SS_res df = lm0.df_res - lm1.df_res MS = SS / df results_table.append((name, SS, df, MS, None, None)) skip = "no Hopkins E(MS)" if skip: self._log.append("SKIPPING: %s (%s)" % (e_test.name, skip)) else: test_table.append((e_test, lm1, lm0, MS_e, df_e)) SS, df, MS, F, p = incremental_F_test(lm1, lm0, MS_e=MS_e, df_e=df_e) results_table.append((name, SS, df, MS, F, p)) if not rfx: results_table.append(("Residuals", SS_e, df_e, MS_e, None, None)) self._test_table = test_table self._results_table = results_table
def __init__(self, Y, X, sub=None, title=None, empty=True, ems=None, showall=False, ds=None): """ Fits a univariate ANOVA model. Mixed effects models require full model specification so that E(MS) can be estimated according to Hopkins (1976) Parameters ---------- Y : var dependent variable X : model Model to fit to Y empty : bool include rows without F-Tests (True/False) ems : bool | None display source of E(MS) for F-Tests (True/False; None = use default) lsq : int least square fitter to use; 0 -> scipy.linalg.lstsq 1 -> after Fox showall : bool show SS, df and MS for effects without F test """ # TODO: # - sort model # - reuse lms which are used repeatedly # - provide threshold for including interaction effects when testing lower # level effects # # Problem with unbalanced models # ------------------------------ # - The SS of Effects which do not include the between-subject factor are # higher than in SPSS # - The SS of effects which include the between-subject factor agree with # SPSS # prepare kwargs Y = asvar(Y, sub=sub, ds=ds) X = asmodel(X, sub=sub, ds=ds) if len(Y) != len(X): raise ValueError("Y and X must describe same number of cases") # save args self.Y = Y self.X = X self.title = title self.show_ems = ems self._log = [] # decide which E(MS) model to use if X.df_error == 0: rfx = 1 fx_desc = "Mixed" elif X.df_error > 0: if hasrandom(X): err = "Models containing random effects need to be fully " "specified." raise NotImplementedError(err) rfx = 0 fx_desc = "Fixed" else: raise ValueError("Model Overdetermined") self._log.append("Using %s effects model" % fx_desc) # list of (name, SS, df, MS, F, p) self.F_tests = [] self.names = [] if len(X.effects) == 1: self._log.append("single factor model") lm1 = lm(Y, X) self.F_tests.append(lm1) self.names.append(X.name) self.residuals = lm1.SS_res, lm1.df_res, lm1.MS_res else: if rfx: pass # <- Hopkins else: full_lm = lm(Y, X) SS_e = full_lm.SS_res MS_e = full_lm.MS_res df_e = full_lm.df_res for e_test in X.effects: skip = False name = e_test.name # find model 0 effects = [] excluded_e = [] for e in X.effects: # determine whether e_test if e is e_test: pass else: if is_higher_order(e, e_test): excluded_e.append(e) else: effects.append(e) model0 = model(*effects) if e_test.df > model0.df_error: skip = "overspecified" else: lm0 = lm(Y, model0) # find model 1 effects.append(e_test) model1 = model(*effects) if model1.df_error > 0: lm1 = lm(Y, model1) else: lm1 = None if rfx: # find E(MS) EMS_effects = _find_hopkins_ems(e_test, X) if len(EMS_effects) > 0: lm_EMS = lm(Y, model(*EMS_effects)) MS_e = lm_EMS.MS_model df_e = lm_EMS.df_model else: if lm1 is None: SS = lm0.SS_res df = lm0.df_res else: SS = lm0.SS_res - lm1.SS_res df = lm0.df_res - lm1.df_res MS = SS / df skip = "no Hopkins E(MS); SS=%.2f, df=%i, " "MS=%.2f" % (SS, df, MS) if skip: self._log.append("SKIPPING: %s (%s)" % (e_test.name, skip)) else: res = incremental_F_test(lm1, lm0, MS_e=MS_e, df_e=df_e, name=name) self.F_tests.append(res) self.names.append(name) if not rfx: self.residuals = SS_e, df_e, MS_e