Example #1
0
def junk():  # FIXME: make this into a test, or move/remove
    # Singular Matrix in mod1a.fit()

    # same as Stata default
    formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)'

    mod = Poisson.from_formula(formula2, data=data,
                               exposure=data['pyears'].values)

    mod.fit()

    constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agec`at)[4]'

    import patsy
    lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints)
    R, q = lc.coefs, lc.constants

    mod.fit_constrained(R, q, fit_kwds={'method': 'bfgs'})

    # example without offset
    formula1a = 'deaths ~ logpyears + smokes + C(agecat)'
    mod1a = Poisson.from_formula(formula1a, data=data)

    mod1a.fit()
    lc_1a = patsy.DesignInfo(mod1a.exog_names).linear_constraint(
        'C(agecat)[T.4] = C(agecat)[T.5]')
    mod1a.fit_constrained(lc_1a.coefs, lc_1a.constants,
                          fit_kwds={'method': 'newton'})
def junk():
    # Singular Matrix in mod1a.fit()

    formula1 = 'deaths ~ smokes + C(agecat)'

    formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)'  # same as Stata default

    mod = Poisson.from_formula(formula2,
                               data=data,
                               exposure=data['pyears'].values)

    res0 = mod.fit()

    constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agecat)[4]'

    import patsy
    lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints)
    R, q = lc.coefs, lc.constants

    resc = mod.fit_constrained(R, q, fit_kwds={'method': 'bfgs'})

    # example without offset
    formula1a = 'deaths ~ logpyears + smokes + C(agecat)'
    mod1a = Poisson.from_formula(formula1a, data=data)
    print(mod1a.exog.shape)

    res1a = mod1a.fit()
    lc_1a = patsy.DesignInfo(
        mod1a.exog_names).linear_constraint('C(agecat)[T.4] = C(agecat)[T.5]')
    resc1a = mod1a.fit_constrained(lc_1a.coefs,
                                   lc_1a.constants,
                                   fit_kwds={'method': 'newton'})
    print(resc1a[0])
    print(resc1a[1])
    def setup_class(cls):

        cls.res2 = results.results_exposure_constraint2
        #cls.idx = [3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula,
                                   data=data,
                                   offset=np.log(data['pyears'].values))

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod,
                                   lc.coefs,
                                   lc.constants,
                                   fit_kwds={
                                       'method': 'newton',
                                       'disp': 0
                                   })
        cls.constraints = lc
        # TODO: bfgs fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr,
                                        method='bfgs',
                                        disp=0,
                                        start_params=cls.res1[0])
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint2
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)

        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-9.43762015,  1.52762442,  2.74155711,  3.58730007,
                      4.08730007,  1.15987869,  0.12111539])

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   start_params=start_params,
                                   fit_kwds={'method': 'bfgs', 'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)
        #res1a = mod1a.fit()
        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-3.93478643,  1.37276214,  2.33077032,  2.71338891,
                      2.71338891, 0.57966535,  0.97254074])

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                        start_params=start_params,
                                        fit_kwds={'method': 'bfgs',
                                                  'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
Example #6
0
def dmatrix(pipe, require, output, config=None):
    """ Create a design matrix from Patsy/R style design strings

    Args:
        pipe (yatsm.pipeline.Pipe): Piped data to operate on
        require (dict[str, list[str]]): Labels for the requirements of this
            calculation
        output (dict[str, list[str]]): Label for the result of this
            calculation
        config

    Returns:
        yatsm.pipeline.Pipe: Piped output
    """
    design = config['design']
    ds = pipe.data

    if '~' in design:
        X = patsy.dmatrices(design, ds)
    elif design.strip() == '1':
        X = np.ones((ds['time'].size, 1))
        X = patsy.DesignMatrix(X, design_info=patsy.DesignInfo(['Intercept']))
    else:
        X = patsy.dmatrix(design, ds)

    coords = (ds['time'], X.design_info.column_names)
    dims = ('time', 'terms')
    pipe.data[output['data'][0]] = xr.DataArray(X, coords, dims)

    return pipe
Example #7
0
def build_equality_constraints_string(
        dmat: xr.DataArray,
        constraints: List[str],
        dims: list
):
    r"""
    Parser for string encoded equality constraints.

    :param dmat: Design matrix.
    :param constraints: List of constraints as strings.

        E.g. ["batch1 + batch2 + batch3 = 0"]
    :param dims: ["design_loc_params", "loc_params"] or ["design_scale_params", "scale_params"]
        Define dimension names of xarray.
    :return: a constraint matrix
    """
    n_par_all = dmat.data_vars['design'].values.shape[1]
    n_par_free = n_par_all - len(constraints)

    di = patsy.DesignInfo(dmat.coords["design_params"].values)
    constraint_ls = [di.linear_constraint(x).coefs[0] for x in constraints]
    idx_constr = np.asarray([np.where(x == 1)[0][0] for x in constraint_ls])
    idx_depending = [np.where(x == 1)[0][1:] for x in constraint_ls]
    idx_unconstr = np.asarray(list(
        set(np.asarray(range(n_par_all))) - set(idx_constr)
    ))

    dmat_var = xr.DataArray(
        dims=[dmat.data_vars['design'].dims[0], "params"],
        data=dmat.data_vars["design"][:,idx_unconstr],
        coords={dmat.data_vars['design'].dims[0]: dmat.coords["observations"].values,
                "params": dmat.coords["design_params"].values[idx_unconstr]}
    )

    constraint_mat = np.zeros([n_par_all, n_par_free])
    for i in range(n_par_all):
        if i in idx_constr:
            idx_dep_i = idx_depending[np.where(idx_constr == i)[0][0]]
            idx_dep_i = np.asarray([np.where(idx_unconstr == x)[0] for x in idx_dep_i])
            constraint_mat[i, :] = 0
            constraint_mat[i, idx_dep_i] = -1
        else:
            idx_unconstr_i = np.where(idx_unconstr == i)
            constraint_mat[i, :] = 0
            constraint_mat[i, idx_unconstr_i] = 1

    constraints_ar = parse_constraints(
        dmat=dmat,
        constraints=constraint_mat,
        dims=dims
    )

    # Test reduced design matrix for full rank before returning constraints:
    if np.linalg.matrix_rank(dmat_var) != np.linalg.matrix_rank(dmat_var.T):
        logger.warning("constrained design matrix is not full rank")

    return constraints_ar
Example #8
0
def constraint_matrix_from_string(
        dmat: np.ndarray,
        coef_names: list,
        constraints: Union[Tuple[str, str], List[str]]
):
    r"""
    Create constraint matrix form string encoded equality constraints.

    :param dmat: Design matrix.
    :param constraints: List of constraints as strings.

        E.g. ["batch1 + batch2 + batch3 = 0"]
    :return: a constraint matrix
    """
    assert len(constraints) > 0, "supply constraints"

    n_par_all = dmat.shape[1]
    n_par_free = n_par_all - len(constraints)

    di = patsy.DesignInfo(coef_names)
    constraint_ls = [di.linear_constraint(x).coefs[0] for x in constraints]
    # Check that constraints are sensible:
    for constraint_i in constraint_ls:
        if np.sum(constraint_i != 0) == 1:
            raise ValueError("a zero-equality constraint only involved one parameter: remove this parameter")
    idx_constr = np.asarray([np.where(x == 1)[0][0] for x in constraint_ls])
    idx_depending = [np.where(x == 1)[0][1:] for x in constraint_ls]
    idx_unconstr = np.asarray(list(
        set(np.asarray(range(n_par_all))) - set(idx_constr)
    ))

    constraint_mat = np.zeros([n_par_all, n_par_free])
    for i in range(n_par_all):
        if i in idx_constr:
            idx_dep_i = idx_depending[np.where(idx_constr == i)[0][0]]
            idx_dep_i = np.asarray([np.where(idx_unconstr == x)[0] for x in idx_dep_i])
            constraint_mat[i, :] = 0
            constraint_mat[i, idx_dep_i] = -1
        else:
            idx_unconstr_i = np.where(idx_unconstr == i)
            constraint_mat[i, :] = 0
            constraint_mat[i, idx_unconstr_i] = 1

    # Test unconstrained subset design matrix for being full rank before returning constraints:
    if np.linalg.matrix_rank(dmat[:, idx_unconstr]) != np.linalg.matrix_rank(dmat[:, idx_unconstr].T):
        raise ValueError(
            "unconstrained sub-design matrix is not full rank" %
            np.linalg.matrix_rank(dmat[:, idx_unconstr]), np.linalg.matrix_rank(dmat[:, idx_unconstr].T)
        )

    return constraint_mat
Example #9
0
    def setup_class(cls):
        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = cls.model_cls.from_formula(formula,
                                         data=data,
                                         family=families.Poisson())

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod,
                                   lc.coefs,
                                   lc.constants,
                                   fit_kwds={'atol': 1e-10})
        cls.constraints = lc
        cls.res1m = mod.fit_constrained(constr, atol=1e-10)
    def setup_class(cls):
        from statsmodels.base._constraints import fit_constrained

        cls.res2 = results.results_noexposure_constraint
        cls.idx = [7, 3, 4, 5, 6, 0,
                   1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = GLM.from_formula(formula, data=data, family=families.Poisson())

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants)
        cls.constraints = lc
        cls.res1m = mod.fit_constrained(constr)
Example #11
0
    def setup_class(cls):
        from statsmodels.genmod.generalized_linear_model import GLM
        from statsmodels.genmod import families
        from statsmodels.base._constraints import fit_constrained

        cls.res2 = results.results_exposure_constraint
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example with offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = GLM.from_formula(formula,
                               data=data,
                               family=families.Poisson(),
                               offset=np.log(data['pyears'].values))

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants)
        cls.constraints = lc
        cls.res1m = mod.fit_constrained(constr)._results