Esempio n. 1
0
    def anova(self, title=None, empty=True, ems=None):
        """
        returns an ANOVA table for the linear model
         
        """
        if ems is None:
            ems = defaults["show_ems"]
        Y = self.Y
        X = self.X
        values = self.values
        # method
        # if X.df_error == 0:
        #    hopkins = True
        e_ms = _hopkins_ems(X)
        # else:
        #    hopkins = False

        # table head
        table = textab.Table("l" + "r" * (5 + ems))
        if title:
            table.title(title)
        elif self.title:
            table.title(self.title)
        #        for msg in X.check():
        #            table.caption('! '+msg)
        table.cell()
        headers = ["SS", "df", "MS"]
        if ems:
            headers += ["E(MS)"]
        headers += ["F", "p"]
        for hd in headers:
            table.cell(hd, r"\textbf", just="c")
        table.midrule()

        if isbalanced(X):
            # MS for factors (Needed for models involving random effects)
            self.MS = []
            for i, name, index, df in X.iter_effects():
                SS = np.sum(values[:, index].sum(1) ** 2)
                self.MS.append(SS / df)
        else:
            raise NotImplementedError()
            tests = {}
            for e in X.effects:  # effect to test
                m0effects = []
                for e0 in X.effects:  # effect in model0
                    if e0 is e:
                        pass
                    elif all([f in e0.factors for f in e.factors]):
                        pass
                    else:
                        m0effects.append(e0)
                model0 = model(m0effects)
                model1 = model0 + e
                SS, df, MS, F, p = incremental_F_test(Y, model1, model0)
                tests[e.name] = dict(SS=SS, df=df, MS=MS, F=F, p=p)

        # table body
        self.results = {}
        for i, name, index, df in X.iter_effects():
            SS = np.sum(values[:, index].sum(1) ** 2)
            # if v: print name, index, SS
            MS = SS / df
            # self.results[name] = {'SS':SS, 'df':df, 'MS':MS}
            if e_ms[i] != None:  # hopkins and
                e_ms_i = e_ms[i]
                MS_d = self.MS[e_ms_i]
                df_d = X.effects[e_ms_i].df
                e_ms_name = X.effects[e_ms_i].name
            elif self.df_res > 0:
                df_d = self.df_res
                MS_d = self.MS_res
                e_ms_name = "Res"
            else:
                MS_d = False
                e_ms_name = None
            # F-test
            if MS_d != False:
                F = MS / MS_d
                p = 1 - sp.stats.distributions.f.cdf(F, df, df_d)
                stars = test.star(p)
                tex_stars = textab.Stars(stars)
                F_tex = [F, tex_stars]
            else:
                F_tex = None
                p = None
            # add to table
            if e_ms_name or empty:
                table.cell(name)
                table.cell(SS)
                table.cell(df, fmt="%i")
                table.cell(MS)
                if ems:
                    table.cell(e_ms_name)
                table.cell(F_tex, mat=True)
                table.cell(p, fmt=defaults["p_fmt"], drop0=True)
            # store results
            self.results[name] = {"SS": SS, "df": df, "MS": MS, "E(MS)": e_ms_name, "F": F, "p": p}
            # self.indexes[name] = index # for self.Ysub()
        # table end
        if self.df_res > 0:
            table.cell("Residuals")
            table.cell(self.SS_res)
            table.cell(self.df_res, fmt="%i")
            table.cell(self.MS_res)
        return table
Esempio n. 2
0
    def __init__(self, Y, X, sub=None, title=None, empty=True, ems=None, lsq=0, showall=False):
        """
        Returns an ANOVA table for the linear model. Mixed effects models require 
        full model specification so that E(MS) can be estimated according to 
        Hopkins (1976)
        
        Random effects: If the model is fully specified, a Hopkins E(MS) table 
        is used to determine error terms in the mixed effects model. Otherwise,
        random factors are treated as fixed factors.
        
        kwargs
        ------
        empty:  include rows without F-Tests (True/False)
        ems:  display source of E(MS) for F-Tests (True/False; None = use default)
        lsq:  least square fitter
                = 0 -> numpy.linalg.lstsq 
                = 1 -> after Fox
        showall: show SS, df and MS for effects without F test
        
        
        TODO
        ----
          - sort model
          - reuse lms which are used repeatedly
          - provide threshold for including interaction effects when testing lower 
            level effects
        
        
        Problem with unbalanced models
        ------------------------------
          - The SS of Effects which do not include the between-subject factor are 
            higher than in SPSS
          - The SS of effects which include the between-subject factor agree with 
            SPSS
        
        """
        # prepare kwargs
        Y = asvar(Y)
        X = asmodel(X)

        if sub is not None:
            Y = Y[sub]
            X = X[sub]

        assert Y.N == X.N

        # save args
        self.Y = Y
        self.X = X
        self.title = title
        self.show_ems = ems

        self._log = []

        # decide which E(MS) model to use
        if X.df_error == 0:
            rfx = 1
            fx_desc = "Mixed"
        elif X.df_error > 0:
            rfx = 0
            fx_desc = "Fixed"
        else:
            raise ValueError("Model Overdetermined")
        self._log.append("%s effects model" % fx_desc)

        if lsq == 1:
            self._log.append("(my lsq)")
        elif lsq == 0:
            self._log.append("\n (np lsq)")

        # create testing table:
        # list of (effect, lm, lm_comp, lm_EMS)
        test_table = []
        #
        # list of (name, SS, df, MS, F, p)
        results_table = []

        if len(X.effects) == 1:
            self._log.append("single factor model")
            lm0 = lm(Y, X, lsq=lsq)
            SS = lm0.SS_model
            df = lm0.df_model
            MS = lm0.MS_model
            F, p = lm0.F_test()
            results_table.append((X.name, SS, df, MS, F, p))
            results_table.append(("Residuals", lm0.SS_res, lm0.df_res, lm0.MS_res, None, None))
        else:
            if not rfx:
                full_lm = lm(Y, X, lsq=lsq)
                SS_e = full_lm.SS_res
                MS_e = full_lm.MS_res
                df_e = full_lm.df_res

            for e_test in X.effects:
                skip = False
                name = e_test.name

                # find model 0
                effects = []
                excluded_e = []
                for e in X.effects:
                    # determine whether e_test
                    if e is e_test:
                        pass
                    else:
                        if _is_higher_order(e, e_test):
                            excluded_e.append(e)
                        else:
                            effects.append(e)

                model0 = model(*effects)
                if e_test.df > model0.df_error:
                    skip = "overspecified"
                else:
                    lm0 = lm(Y, model0, lsq=lsq)

                    # find model 1
                    effects.append(e_test)
                    model1 = model(*effects)
                    if model1.df_error > 0:
                        lm1 = lm(Y, model1, lsq=lsq)
                    else:
                        lm1 = None

                    if rfx:
                        # find E(MS)
                        EMS_effects = []
                        for e in X.effects:
                            if e is e_test:
                                pass
                            elif all([(f in e_test or f.random) for f in e.factors]):
                                if all([(f in e or e.nestedin(f)) for f in e_test.factors]):
                                    EMS_effects.append(e)
                        if len(EMS_effects) > 0:
                            lm_EMS = lm(Y, model(*EMS_effects), lsq=lsq)
                            MS_e = lm_EMS.MS_model
                            df_e = lm_EMS.df_model
                        else:
                            if showall:
                                if lm1 is None:
                                    SS = lm0.SS_res
                                    df = lm0.df_res
                                else:
                                    SS = lm0.SS_res - lm1.SS_res
                                    df = lm0.df_res - lm1.df_res
                                MS = SS / df
                                results_table.append((name, SS, df, MS, None, None))
                            skip = "no Hopkins E(MS)"

                if skip:
                    self._log.append("SKIPPING: %s (%s)" % (e_test.name, skip))
                else:
                    test_table.append((e_test, lm1, lm0, MS_e, df_e))
                    SS, df, MS, F, p = incremental_F_test(lm1, lm0, MS_e=MS_e, df_e=df_e)
                    results_table.append((name, SS, df, MS, F, p))
            if not rfx:
                results_table.append(("Residuals", SS_e, df_e, MS_e, None, None))
        self._test_table = test_table
        self._results_table = results_table
Esempio n. 3
0
    def __init__(self, Y, X, sub=None, title=None, empty=True, ems=None, showall=False, ds=None):
        """
        Fits a univariate ANOVA model.

        Mixed effects models require full model specification so that E(MS)
        can be estimated according to Hopkins (1976)


        Parameters
        ----------
        Y : var
            dependent variable
        X : model
            Model to fit to Y
        empty : bool
            include rows without F-Tests (True/False)
        ems : bool | None
            display source of E(MS) for F-Tests (True/False; None = use default)
        lsq : int
            least square fitter to use;
            0 -> scipy.linalg.lstsq
            1 -> after Fox
        showall : bool
            show SS, df and MS for effects without F test
        """
        #  TODO:
        #         - sort model
        #          - reuse lms which are used repeatedly
        #          - provide threshold for including interaction effects when testing lower
        #            level effects
        #
        #        Problem with unbalanced models
        #        ------------------------------
        #          - The SS of Effects which do not include the between-subject factor are
        #            higher than in SPSS
        #          - The SS of effects which include the between-subject factor agree with
        #            SPSS

        # prepare kwargs
        Y = asvar(Y, sub=sub, ds=ds)
        X = asmodel(X, sub=sub, ds=ds)

        if len(Y) != len(X):
            raise ValueError("Y and X must describe same number of cases")

        # save args
        self.Y = Y
        self.X = X
        self.title = title
        self.show_ems = ems
        self._log = []

        # decide which E(MS) model to use
        if X.df_error == 0:
            rfx = 1
            fx_desc = "Mixed"
        elif X.df_error > 0:
            if hasrandom(X):
                err = "Models containing random effects need to be fully " "specified."
                raise NotImplementedError(err)
            rfx = 0
            fx_desc = "Fixed"
        else:
            raise ValueError("Model Overdetermined")
        self._log.append("Using %s effects model" % fx_desc)

        # list of (name, SS, df, MS, F, p)
        self.F_tests = []
        self.names = []

        if len(X.effects) == 1:
            self._log.append("single factor model")
            lm1 = lm(Y, X)
            self.F_tests.append(lm1)
            self.names.append(X.name)
            self.residuals = lm1.SS_res, lm1.df_res, lm1.MS_res
        else:
            if rfx:
                pass  # <- Hopkins
            else:
                full_lm = lm(Y, X)
                SS_e = full_lm.SS_res
                MS_e = full_lm.MS_res
                df_e = full_lm.df_res

            for e_test in X.effects:
                skip = False
                name = e_test.name

                # find model 0
                effects = []
                excluded_e = []
                for e in X.effects:
                    # determine whether e_test
                    if e is e_test:
                        pass
                    else:
                        if is_higher_order(e, e_test):
                            excluded_e.append(e)
                        else:
                            effects.append(e)

                model0 = model(*effects)
                if e_test.df > model0.df_error:
                    skip = "overspecified"
                else:
                    lm0 = lm(Y, model0)

                    # find model 1
                    effects.append(e_test)
                    model1 = model(*effects)
                    if model1.df_error > 0:
                        lm1 = lm(Y, model1)
                    else:
                        lm1 = None

                    if rfx:
                        # find E(MS)
                        EMS_effects = _find_hopkins_ems(e_test, X)

                        if len(EMS_effects) > 0:
                            lm_EMS = lm(Y, model(*EMS_effects))
                            MS_e = lm_EMS.MS_model
                            df_e = lm_EMS.df_model
                        else:
                            if lm1 is None:
                                SS = lm0.SS_res
                                df = lm0.df_res
                            else:
                                SS = lm0.SS_res - lm1.SS_res
                                df = lm0.df_res - lm1.df_res
                            MS = SS / df
                            skip = "no Hopkins E(MS); SS=%.2f, df=%i, " "MS=%.2f" % (SS, df, MS)

                if skip:
                    self._log.append("SKIPPING: %s (%s)" % (e_test.name, skip))
                else:
                    res = incremental_F_test(lm1, lm0, MS_e=MS_e, df_e=df_e, name=name)
                    self.F_tests.append(res)
                    self.names.append(name)
            if not rfx:
                self.residuals = SS_e, df_e, MS_e