Exemple #1
0
    def __init__(self, X):
        """
        X : model
            Model which will be fitted to the data.

        """
        # prepare input
        self.X = X = asmodel(X)
        self.n_cases = len(X)
        if not isbalanced(X):
            raise NotImplementedError("Unbalanced models")
        self.X_ = X.full

        self.full_model = fm = X.df_error == 0
        if fm:
            self.E_MS = hopkins_ems(X)
        elif hasrandom(X):
            err = "Models containing random effects need to be fully " "specified."
            raise NotImplementedError(err)

        self.max_len = int(2 ** _max_array_size // X.df ** 2)

        if _lmf_lsq == 0:
            pass
        elif _lmf_lsq == 1:
            self.Xsinv = X.Xsinv
        else:
            raise ValueError("version")
Exemple #2
0
    def __init__(self, Y, X, sub=None, title=None, empty=True, ems=None, showall=False, ds=None):
        """
        Fits a univariate ANOVA model.

        Mixed effects models require full model specification so that E(MS)
        can be estimated according to Hopkins (1976)


        Parameters
        ----------
        Y : var
            dependent variable
        X : model
            Model to fit to Y
        empty : bool
            include rows without F-Tests (True/False)
        ems : bool | None
            display source of E(MS) for F-Tests (True/False; None = use default)
        lsq : int
            least square fitter to use;
            0 -> scipy.linalg.lstsq
            1 -> after Fox
        showall : bool
            show SS, df and MS for effects without F test
        """
        #  TODO:
        #         - sort model
        #          - reuse lms which are used repeatedly
        #          - provide threshold for including interaction effects when testing lower
        #            level effects
        #
        #        Problem with unbalanced models
        #        ------------------------------
        #          - The SS of Effects which do not include the between-subject factor are
        #            higher than in SPSS
        #          - The SS of effects which include the between-subject factor agree with
        #            SPSS

        # prepare kwargs
        Y = asvar(Y, sub=sub, ds=ds)
        X = asmodel(X, sub=sub, ds=ds)

        if len(Y) != len(X):
            raise ValueError("Y and X must describe same number of cases")

        # save args
        self.Y = Y
        self.X = X
        self.title = title
        self.show_ems = ems
        self._log = []

        # decide which E(MS) model to use
        if X.df_error == 0:
            rfx = 1
            fx_desc = "Mixed"
        elif X.df_error > 0:
            if hasrandom(X):
                err = "Models containing random effects need to be fully " "specified."
                raise NotImplementedError(err)
            rfx = 0
            fx_desc = "Fixed"
        else:
            raise ValueError("Model Overdetermined")
        self._log.append("Using %s effects model" % fx_desc)

        # list of (name, SS, df, MS, F, p)
        self.F_tests = []
        self.names = []

        if len(X.effects) == 1:
            self._log.append("single factor model")
            lm1 = lm(Y, X)
            self.F_tests.append(lm1)
            self.names.append(X.name)
            self.residuals = lm1.SS_res, lm1.df_res, lm1.MS_res
        else:
            if rfx:
                pass  # <- Hopkins
            else:
                full_lm = lm(Y, X)
                SS_e = full_lm.SS_res
                MS_e = full_lm.MS_res
                df_e = full_lm.df_res

            for e_test in X.effects:
                skip = False
                name = e_test.name

                # find model 0
                effects = []
                excluded_e = []
                for e in X.effects:
                    # determine whether e_test
                    if e is e_test:
                        pass
                    else:
                        if is_higher_order(e, e_test):
                            excluded_e.append(e)
                        else:
                            effects.append(e)

                model0 = model(*effects)
                if e_test.df > model0.df_error:
                    skip = "overspecified"
                else:
                    lm0 = lm(Y, model0)

                    # find model 1
                    effects.append(e_test)
                    model1 = model(*effects)
                    if model1.df_error > 0:
                        lm1 = lm(Y, model1)
                    else:
                        lm1 = None

                    if rfx:
                        # find E(MS)
                        EMS_effects = _find_hopkins_ems(e_test, X)

                        if len(EMS_effects) > 0:
                            lm_EMS = lm(Y, model(*EMS_effects))
                            MS_e = lm_EMS.MS_model
                            df_e = lm_EMS.df_model
                        else:
                            if lm1 is None:
                                SS = lm0.SS_res
                                df = lm0.df_res
                            else:
                                SS = lm0.SS_res - lm1.SS_res
                                df = lm0.df_res - lm1.df_res
                            MS = SS / df
                            skip = "no Hopkins E(MS); SS=%.2f, df=%i, " "MS=%.2f" % (SS, df, MS)

                if skip:
                    self._log.append("SKIPPING: %s (%s)" % (e_test.name, skip))
                else:
                    res = incremental_F_test(lm1, lm0, MS_e=MS_e, df_e=df_e, name=name)
                    self.F_tests.append(res)
                    self.names.append(name)
            if not rfx:
                self.residuals = SS_e, df_e, MS_e
Exemple #3
0
    def anova(self, title="ANOVA", empty=True, ems=False):
        """
        returns an ANOVA table for the linear model

        """
        X = self.X
        values = self.beta * self.X.full

        if X.df_error == 0:
            e_ms = hopkins_ems(X)
        elif hasrandom(X):
            err = "Models containing random effects need to be fully " "specified."
            raise NotImplementedError(err)
        else:
            e_ms = False

        # table head
        table = fmtxt.Table("l" + "r" * (5 + ems))
        if title:
            table.title(title)

        if not isbalanced(X):
            table.caption("Warning: model is unbalanced, use anova class")

        table.cell()
        headers = ["SS", "df", "MS"]
        if ems:
            headers += ["E(MS)"]
        headers += ["F", "p"]
        for hd in headers:
            table.cell(hd, r"\textbf", just="c")
        table.midrule()

        # MS for factors (Needed for models involving random effects)
        MSs = {}
        SSs = {}
        for e in X.effects:
            idx = X.full_index[e]
            SSs[e] = SS = np.sum(values[:, idx].sum(1) ** 2)
            MSs[e] = SS / e.df

        # table body
        results = {}
        for e in X.effects:
            MS = MSs[e]
            if e_ms:
                e_EMS = e_ms[e]
                df_d = sum(c.df for c in e_EMS)
                MS_d = sum(MSs[c] for c in e_EMS)
                e_ms_name = " + ".join(repr(c) for c in e_EMS)
            else:
                df_d = self.df_res
                MS_d = self.MS_res
                e_ms_name = "Res"

            # F-test
            if MS_d != False:
                F = MS / MS_d
                p = 1 - scipy.stats.distributions.f.cdf(F, e.df, df_d)
                stars = test.star(p)
                tex_stars = fmtxt.Stars(stars)
                F_tex = [F, tex_stars]
            else:
                F_tex = None
                p = None
            # add to table
            if e_ms_name or empty:
                table.cell(e.name)
                table.cell(SSs[e])
                table.cell(e.df, fmt="%i")
                table.cell(MS)
                if ems:
                    table.cell(e_ms_name)
                table.cell(F_tex, mat=True)
                table.cell(fmtxt.p(p))
            # store results
            results[e.name] = {"SS": SS, "df": e.df, "MS": MS, "E(MS)": e_ms_name, "F": F, "p": p}

        # Residuals
        if self.df_res > 0:
            table.cell("Residuals")
            table.cell(self.SS_res)
            table.cell(self.df_res, fmt="%i")
            table.cell(self.MS_res)

        return table