def __init__(self, X): """ X : model Model which will be fitted to the data. """ # prepare input self.X = X = asmodel(X) self.n_cases = len(X) if not isbalanced(X): raise NotImplementedError("Unbalanced models") self.X_ = X.full self.full_model = fm = X.df_error == 0 if fm: self.E_MS = hopkins_ems(X) elif hasrandom(X): err = "Models containing random effects need to be fully " "specified." raise NotImplementedError(err) self.max_len = int(2 ** _max_array_size // X.df ** 2) if _lmf_lsq == 0: pass elif _lmf_lsq == 1: self.Xsinv = X.Xsinv else: raise ValueError("version")
def __init__(self, Y, X, sub=None, title=None, empty=True, ems=None, showall=False, ds=None): """ Fits a univariate ANOVA model. Mixed effects models require full model specification so that E(MS) can be estimated according to Hopkins (1976) Parameters ---------- Y : var dependent variable X : model Model to fit to Y empty : bool include rows without F-Tests (True/False) ems : bool | None display source of E(MS) for F-Tests (True/False; None = use default) lsq : int least square fitter to use; 0 -> scipy.linalg.lstsq 1 -> after Fox showall : bool show SS, df and MS for effects without F test """ # TODO: # - sort model # - reuse lms which are used repeatedly # - provide threshold for including interaction effects when testing lower # level effects # # Problem with unbalanced models # ------------------------------ # - The SS of Effects which do not include the between-subject factor are # higher than in SPSS # - The SS of effects which include the between-subject factor agree with # SPSS # prepare kwargs Y = asvar(Y, sub=sub, ds=ds) X = asmodel(X, sub=sub, ds=ds) if len(Y) != len(X): raise ValueError("Y and X must describe same number of cases") # save args self.Y = Y self.X = X self.title = title self.show_ems = ems self._log = [] # decide which E(MS) model to use if X.df_error == 0: rfx = 1 fx_desc = "Mixed" elif X.df_error > 0: if hasrandom(X): err = "Models containing random effects need to be fully " "specified." raise NotImplementedError(err) rfx = 0 fx_desc = "Fixed" else: raise ValueError("Model Overdetermined") self._log.append("Using %s effects model" % fx_desc) # list of (name, SS, df, MS, F, p) self.F_tests = [] self.names = [] if len(X.effects) == 1: self._log.append("single factor model") lm1 = lm(Y, X) self.F_tests.append(lm1) self.names.append(X.name) self.residuals = lm1.SS_res, lm1.df_res, lm1.MS_res else: if rfx: pass # <- Hopkins else: full_lm = lm(Y, X) SS_e = full_lm.SS_res MS_e = full_lm.MS_res df_e = full_lm.df_res for e_test in X.effects: skip = False name = e_test.name # find model 0 effects = [] excluded_e = [] for e in X.effects: # determine whether e_test if e is e_test: pass else: if is_higher_order(e, e_test): excluded_e.append(e) else: effects.append(e) model0 = model(*effects) if e_test.df > model0.df_error: skip = "overspecified" else: lm0 = lm(Y, model0) # find model 1 effects.append(e_test) model1 = model(*effects) if model1.df_error > 0: lm1 = lm(Y, model1) else: lm1 = None if rfx: # find E(MS) EMS_effects = _find_hopkins_ems(e_test, X) if len(EMS_effects) > 0: lm_EMS = lm(Y, model(*EMS_effects)) MS_e = lm_EMS.MS_model df_e = lm_EMS.df_model else: if lm1 is None: SS = lm0.SS_res df = lm0.df_res else: SS = lm0.SS_res - lm1.SS_res df = lm0.df_res - lm1.df_res MS = SS / df skip = "no Hopkins E(MS); SS=%.2f, df=%i, " "MS=%.2f" % (SS, df, MS) if skip: self._log.append("SKIPPING: %s (%s)" % (e_test.name, skip)) else: res = incremental_F_test(lm1, lm0, MS_e=MS_e, df_e=df_e, name=name) self.F_tests.append(res) self.names.append(name) if not rfx: self.residuals = SS_e, df_e, MS_e
def anova(self, title="ANOVA", empty=True, ems=False): """ returns an ANOVA table for the linear model """ X = self.X values = self.beta * self.X.full if X.df_error == 0: e_ms = hopkins_ems(X) elif hasrandom(X): err = "Models containing random effects need to be fully " "specified." raise NotImplementedError(err) else: e_ms = False # table head table = fmtxt.Table("l" + "r" * (5 + ems)) if title: table.title(title) if not isbalanced(X): table.caption("Warning: model is unbalanced, use anova class") table.cell() headers = ["SS", "df", "MS"] if ems: headers += ["E(MS)"] headers += ["F", "p"] for hd in headers: table.cell(hd, r"\textbf", just="c") table.midrule() # MS for factors (Needed for models involving random effects) MSs = {} SSs = {} for e in X.effects: idx = X.full_index[e] SSs[e] = SS = np.sum(values[:, idx].sum(1) ** 2) MSs[e] = SS / e.df # table body results = {} for e in X.effects: MS = MSs[e] if e_ms: e_EMS = e_ms[e] df_d = sum(c.df for c in e_EMS) MS_d = sum(MSs[c] for c in e_EMS) e_ms_name = " + ".join(repr(c) for c in e_EMS) else: df_d = self.df_res MS_d = self.MS_res e_ms_name = "Res" # F-test if MS_d != False: F = MS / MS_d p = 1 - scipy.stats.distributions.f.cdf(F, e.df, df_d) stars = test.star(p) tex_stars = fmtxt.Stars(stars) F_tex = [F, tex_stars] else: F_tex = None p = None # add to table if e_ms_name or empty: table.cell(e.name) table.cell(SSs[e]) table.cell(e.df, fmt="%i") table.cell(MS) if ems: table.cell(e_ms_name) table.cell(F_tex, mat=True) table.cell(fmtxt.p(p)) # store results results[e.name] = {"SS": SS, "df": e.df, "MS": MS, "E(MS)": e_ms_name, "F": F, "p": p} # Residuals if self.df_res > 0: table.cell("Residuals") table.cell(self.SS_res) table.cell(self.df_res, fmt="%i") table.cell(self.MS_res) return table