예제 #1
0
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint2
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)

        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-9.43762015,  1.52762442,  2.74155711,  3.58730007,
                      4.08730007,  1.15987869,  0.12111539])

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   start_params=start_params,
                                   fit_kwds={'method': 'bfgs', 'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
예제 #2
0
    def fit_constrained(self, constraints, start_params=None, **fit_kwds):
        """fit the model subject to linear equality constraints

        The constraints are of the form   `R params = q`
        where R is the constraint_matrix and q is the vector of
        constraint_values.

        The estimation creates a new model with transformed design matrix,
        exog, and converts the results back to the original parameterization.


        Parameters
        ----------
        constraints : formula expression or tuple
            If it is a tuple, then the constraint needs to be given by two
            arrays (constraint_matrix, constraint_value), i.e. (R, q).
            Otherwise, the constraints can be given as strings or list of
            strings.
            see t_test for details
        start_params : None or array_like
            starting values for the optimization. `start_params` needs to be
            given in the original parameter space and are internally
            transformed.
        **fit_kwds : keyword arguments
            fit_kwds are used in the optimization of the transformed model.

        Returns
        -------
        results : Results instance

        """

        from patsy import DesignInfo
        from statsmodels.base._constraints import fit_constrained

        # same pattern as in base.LikelihoodModel.t_test
        lc = DesignInfo(self.exog_names).linear_constraint(constraints)
        R, q = lc.coefs, lc.constants

        # TODO: add start_params option, need access to tranformation
        #       fit_constrained needs to do the transformation
        params, cov, res_constr = fit_constrained(self,
                                                  R,
                                                  q,
                                                  start_params=start_params,
                                                  fit_kwds=fit_kwds)
        #create dummy results Instance, TODO: wire up properly
        res = self.fit(start_params=params, maxiter=0)  # we get a wrapper back
        res._results.params = params
        res._results.normalized_cov_params = cov
        k_constr = len(q)
        res._results.df_resid += k_constr
        res._results.df_model -= k_constr
        res._results.constraints = lc
        res._results.k_constr = k_constr
        res._results.results_constrained = res_constr
        # TODO: the next is not the best. history should bin in results
        res._results.model.history = res_constr.model.history
        res._results.mu = res_constr.mu
        return res
예제 #3
0
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)
        #res1a = mod1a.fit()
        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-3.93478643,  1.37276214,  2.33077032,  2.71338891,
                      2.71338891, 0.57966535,  0.97254074])

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                        start_params=start_params,
                                        fit_kwds={'method': 'bfgs',
                                                  'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
예제 #4
0
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint2
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)

        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-9.43762015,  1.52762442,  2.74155711,  3.58730007,
                      4.08730007,  1.15987869,  0.12111539])

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   start_params=start_params,
                                   fit_kwds={'method': 'bfgs', 'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
    def setup_class(cls):

        cls.res2 = results.results_exposure_constraint2
        #cls.idx = [3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula,
                                   data=data,
                                   offset=np.log(data['pyears'].values))

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod,
                                   lc.coefs,
                                   lc.constants,
                                   fit_kwds={
                                       'method': 'newton',
                                       'disp': 0
                                   })
        cls.constraints = lc
        # TODO: bfgs fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr,
                                        method='bfgs',
                                        disp=0,
                                        start_params=cls.res1[0])
예제 #6
0
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)
        #res1a = mod1a.fit()
        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-3.93478643,  1.37276214,  2.33077032,  2.71338891,
                      2.71338891, 0.57966535,  0.97254074])

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                        start_params=start_params,
                                        fit_kwds={'method': 'bfgs',
                                                  'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
    def setup_class(cls):
        cls.idx = slice(None)  # params sequence same as Stata
        #res1ul = Logit(data.endog, data.exog).fit(method="newton", disp=0)
        cls.res2 = reslogit.results_constraint2_robust

        mod1 = GLM(spector_data.endog,
                   spector_data.exog,
                   family=families.Binomial())

        # not used to match Stata for HC
        # nobs, k_params = mod1.exog.shape
        # k_params -= 1   # one constraint
        cov_type = 'HC0'
        cov_kwds = {'scaling_factor': 32 / 31}
        # looks like nobs / (nobs - 1) and not (nobs - 1.) / (nobs - k_params)}
        constr = 'x1 - x3 = 0'
        cls.res1m = mod1.fit_constrained(constr,
                                         cov_type=cov_type,
                                         cov_kwds=cov_kwds,
                                         atol=1e-10)

        R, q = cls.res1m.constraints.coefs, cls.res1m.constraints.constants
        cls.res1 = fit_constrained(mod1,
                                   R,
                                   q,
                                   fit_kwds={
                                       'atol': 1e-10,
                                       'cov_type': cov_type,
                                       'cov_kwds': cov_kwds
                                   })
        cls.constraints_rq = (R, q)
    def fit_constrained(self, constraints, start_params=None, **fit_kwds):
        """fit the model subject to linear equality constraints

        The constraints are of the form   `R params = q`
        where R is the constraint_matrix and q is the vector of
        constraint_values.

        The estimation creates a new model with transformed design matrix,
        exog, and converts the results back to the original parameterization.


        Parameters
        ----------
        constraints : formula expression or tuple
            If it is a tuple, then the constraint needs to be given by two
            arrays (constraint_matrix, constraint_value), i.e. (R, q).
            Otherwise, the constraints can be given as strings or list of
            strings.
            see t_test for details
        start_params : None or array_like
            starting values for the optimization. `start_params` needs to be
            given in the original parameter space and are internally
            transformed.
        **fit_kwds : keyword arguments
            fit_kwds are used in the optimization of the transformed model.

        Returns
        -------
        results : Results instance

        """

        from patsy import DesignInfo
        from statsmodels.base._constraints import fit_constrained

        # same pattern as in base.LikelihoodModel.t_test
        lc = DesignInfo(self.exog_names).linear_constraint(constraints)
        R, q = lc.coefs, lc.constants

        # TODO: add start_params option, need access to tranformation
        #       fit_constrained needs to do the transformation
        params, cov, res_constr = fit_constrained(self, R, q,
                                                  start_params=start_params,
                                                  fit_kwds=fit_kwds)
        #create dummy results Instance, TODO: wire up properly
        res = self.fit(start_params=params, maxiter=0) # we get a wrapper back
        res._results.params = params
        res._results.normalized_cov_params = cov
        k_constr = len(q)
        res._results.df_resid += k_constr
        res._results.df_model -= k_constr
        res._results.constraints = lc
        res._results.k_constr = k_constr
        res._results.results_constrained = res_constr
        # TODO: the next is not the best. history should bin in results
        res._results.model.history = res_constr.model.history
        res._results.mu = res_constr.mu
        return res
예제 #9
0
    def setup_class(cls):
        cls.idx = slice(None)  # params sequence same as Stata
        cls.res2 = reslogit.results_constraint2

        mod1 = GLM(spector_data.endog, spector_data.exog,
                   family=families.Binomial())

        constr = 'x1 - x3 = 0'
        cls.res1m = mod1.fit_constrained(constr, atol=1e-10)

        # patsy compatible constraints
        R, q = cls.res1m.constraints.coefs, cls.res1m.constraints.constants
        cls.res1 = fit_constrained(mod1, R, q, fit_kwds={'atol': 1e-10})
        cls.constraints_rq = (R, q)
예제 #10
0
    def setup_class(cls):
        cls.idx = slice(None)  # params sequence same as Stata
        #res1ul = Logit(data.endog, data.exog).fit(method="newton", disp=0)
        cls.res2 = reslogit.results_constraint2

        mod1 = GLM(spector_data.endog, spector_data.exog,
                   family=families.Binomial())

        constr = 'x1 - x3 = 0'
        cls.res1m = mod1.fit_constrained(constr, atol=1e-10)

        R, q = cls.res1m.constraints.coefs, cls.res1m.constraints.constants
        cls.res1 = fit_constrained(mod1, R, q, fit_kwds={'atol': 1e-10})
        cls.constraints_rq = (R, q)
예제 #11
0
    def setup_class(cls):
        cls.idx = slice(None)  # params sequence same as Stata
        #res1ul = Logit(data.endog, data.exog).fit(method="newton", disp=0)
        cls.res2 = reslogit.results_constraint2

        mod1 = GLM(spector_data.endog, spector_data.exog,
                   family=families.Binomial())

        constr = 'x1 - x3 = 0'
        cls.res1m = mod1.fit_constrained(constr, atol=1e-10)

        R, q = cls.res1m.constraints.coefs, cls.res1m.constraints.constants
        cls.res1 = fit_constrained(mod1, R, q, fit_kwds={'atol': 1e-10})
        cls.constraints_rq = (R, q)
예제 #12
0
    def setup_class(cls):
        cls.idx = slice(None)
        # params sequence same as Stata, but Stata reports param = nan
        # and we have param = value = 0

        cls.res2 = reslogit.results_constraint1

        mod1 = GLM(spector_data.endog, spector_data.exog,
                   family=families.Binomial())

        constr = 'x1 = 2.8'
        cls.res1m = mod1.fit_constrained(constr)

        R, q = cls.res1m.constraints
        cls.res1 = fit_constrained(mod1, R, q)
    def setup_class(cls):
        from statsmodels.base._constraints import fit_constrained

        cls.res2 = results.results_noexposure_constraint
        cls.idx = [7, 3, 4, 5, 6, 0,
                   1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = GLM.from_formula(formula, data=data, family=families.Poisson())

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants)
        cls.constraints = lc
        cls.res1m = mod.fit_constrained(constr)
예제 #14
0
    def setup_class(cls):
        cls.idx = slice(None)
        # params sequence same as Stata, but Stata reports param = nan
        # and we have param = value = 0

        # res1ul = Logit(data.endog, data.exog).fit(method="newton", disp=0)
        cls.res2 = reslogit.results_constraint1

        mod1 = Logit(spector_data.endog, spector_data.exog)

        constr = 'x1 = 2.8'
        # newton doesn't work, raises hessian singular
        cls.res1m = mod1.fit_constrained(constr, method='bfgs')

        R, q = cls.res1m.constraints.coefs, cls.res1m.constraints.constants
        cls.res1 = fit_constrained(mod1, R, q, fit_kwds={'method': 'bfgs'})
예제 #15
0
    def setup_class(cls):
        cls.idx = slice(None)
        # params sequence same as Stata, but Stata reports param = nan
        # and we have param = value = 0

        #res1ul = Logit(data.endog, data.exog).fit(method="newton", disp=0)
        cls.res2 = reslogit.results_constraint1

        mod1 = GLM(spector_data.endog, spector_data.exog,
                   family=families.Binomial())

        constr = 'x1 = 2.8'
        cls.res1m = mod1.fit_constrained(constr)

        R, q = cls.res1m.constraints.coefs, cls.res1m.constraints.constants
        cls.res1 = fit_constrained(mod1, R, q)
예제 #16
0
    def setup_class(cls):
        from statsmodels.base._constraints import fit_constrained

        cls.res2 = results.results_noexposure_constraint
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = GLM.from_formula(formula, data=data,
                                    family=families.Poisson())

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants)
        cls.constraints = lc
        cls.res1m = mod.fit_constrained(constr)
예제 #17
0
    def setup_class(cls):
        from statsmodels.genmod.generalized_linear_model import GLM
        from statsmodels.genmod import families
        from statsmodels.base._constraints import fit_constrained

        cls.res2 = results.results_exposure_constraint
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example with offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = GLM.from_formula(formula, data=data,
                                    family=families.Poisson(),
                                    offset=np.log(data['pyears'].values))

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants)
        cls.constraints = lc
        cls.res1m = mod.fit_constrained(constr)._results
예제 #18
0
    def setup_class(cls):

        cls.res2 = results.results_exposure_constraint
        #cls.idx = [3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   offset=np.log(data['pyears'].values))
        #res1a = mod1a.fit()
        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   fit_kwds={'method':'newton'})
        cls.constraints = lc
        # TODO: bfgs fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, method='newton')
예제 #19
0
    def setup_class(cls):
        from statsmodels.genmod.generalized_linear_model import GLM
        from statsmodels.genmod import families
        from statsmodels.base._constraints import fit_constrained

        cls.res2 = results.results_exposure_constraint
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example with offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = GLM.from_formula(formula,
                               data=data,
                               family=families.Poisson(),
                               offset=np.log(data['pyears'].values))

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants)
        cls.constraints = lc
        cls.res1m = mod.fit_constrained(constr)._results
예제 #20
0
    def setup_class(cls):
        cls.idx = slice(None)  # params sequence same as Stata
        #res1ul = Logit(data.endog, data.exog).fit(method="newton", disp=0)
        cls.res2 = reslogit.results_constraint2_robust

        mod1 = GLM(spector_data.endog, spector_data.exog,
                   family=families.Binomial())

        # not used to match Stata for HC
        # nobs, k_params = mod1.exog.shape
        # k_params -= 1   # one constraint
        cov_type = 'HC0'
        cov_kwds = {'scaling_factor': 32/31}
        # looks like nobs / (nobs - 1) and not (nobs - 1.) / (nobs - k_params)}
        constr = 'x1 - x3 = 0'
        cls.res1m = mod1.fit_constrained(constr, cov_type=cov_type,
                                         cov_kwds=cov_kwds, atol=1e-10)

        R, q = cls.res1m.constraints.coefs, cls.res1m.constraints.constants
        cls.res1 = fit_constrained(mod1, R, q, fit_kwds={'atol': 1e-10,
                                                         'cov_type': cov_type,
                                                         'cov_kwds': cov_kwds})
        cls.constraints_rq = (R, q)
예제 #21
0
    def setup_class(cls):

        cls.res2 = results.results_exposure_constraint2
        #cls.idx = [3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   #offset=np.log(data['pyears'].values))
                                   exposure=data['pyears'].values)
        #res1a = mod1a.fit()
        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   fit_kwds={'method': 'newton',
                                             'disp': 0})
        cls.constraints = lc
        # TODO: bfgs fails to converge. overflow somewhere?

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, method='bfgs', disp=0,
                                        start_params=cls.res1[0])
예제 #22
0
#    cov_params3 = transf3.transf_mat.dot(res3.cov_params()).dot(transf3.transf_mat.T)
#    bse3 = np.sqrt(np.diag(cov_params3))


    poisson_mod0 = sm.Poisson(rand_data.endog, rand_exog)
    poisson_res0 = poisson_mod0.fit(method="newton")
    tp = transform_params_constraint(poisson_res0.params, poisson_res0.cov_params(), transfp.R, transfp.q)
    cov_params3 = transf3.transf_mat.dot(res3.cov_params()).dot(transf3.transf_mat.T)
    bse3 = np.sqrt(np.diag(cov_params3))

    # try again same example as it was intended

    poisson_mod = sm.Poisson(rand_data.endog, rand_exog[:, :-2], offset=rand_exog[:, -1])
    poisson_res = poisson_mod.fit(method="newton")

    exogp_st = transfp.reduce(rand_exog)
    poisson_modr = sm.Poisson(rand_data.endog, exogp_st, offset=rand_exog.dot(transfp.constant.squeeze()))
    poisson_resr = poisson_modr.fit(method="newton")
    paramsp = transfp.expand(poisson_resr.params).squeeze()
    print('\nPoisson')
    print(paramsp)
    print(poisson_resr.params)
    tp = transform_params_constraint(poisson_res0.params, poisson_res0.cov_params(), transfp.R, transfp.q)
    cov_paramsp = transfp.transf_mat.dot(poisson_resr.cov_params()).dot(transfp.transf_mat.T)
    bsep = np.sqrt(np.diag(cov_paramsp))
    print(bsep)
    p, cov, res_r = fit_constrained(poisson_mod0, transfp.R, transfp.q)
    se = np.sqrt(np.diag(cov))
    print(p)
    print(se)
#    cov_params3 = transf3.transf_mat.dot(res3.cov_params()).dot(transf3.transf_mat.T)
#    bse3 = np.sqrt(np.diag(cov_params3))


    poisson_mod0 = sm.Poisson(rand_data.endog, rand_exog)
    poisson_res0 = poisson_mod0.fit(method="newton")
    tp = transform_params_constraint(poisson_res0.params, poisson_res0.cov_params(), transfp.R, transfp.q)
    cov_params3 = transf3.transf_mat.dot(res3.cov_params()).dot(transf3.transf_mat.T)
    bse3 = np.sqrt(np.diag(cov_params3))

    # try again same example as it was intended

    poisson_mod = sm.Poisson(rand_data.endog, rand_exog[:, :-2], offset=rand_exog[:, -1])
    poisson_res = poisson_mod.fit(method="newton")

    exogp_st = transfp.reduce(rand_exog)
    poisson_modr = sm.Poisson(rand_data.endog, exogp_st, offset=rand_exog.dot(transfp.constant.squeeze()))
    poisson_resr = poisson_modr.fit(method="newton")
    paramsp = transfp.expand(poisson_resr.params).squeeze()
    print('\nPoisson')
    print(paramsp)
    print(poisson_resr.params)
    tp = transform_params_constraint(poisson_res0.params, poisson_res0.cov_params(), transfp.R, transfp.q)
    cov_paramsp = transfp.transf_mat.dot(poisson_resr.cov_params()).dot(transfp.transf_mat.T)
    bsep = np.sqrt(np.diag(cov_paramsp))
    print(bsep)
    p, cov, res_r = fit_constrained(poisson_mod0, transfp.R, transfp.q)
    se = np.sqrt(np.diag(cov))
    print(p)
    print(se)