def junk(): # FIXME: make this into a test, or move/remove # Singular Matrix in mod1a.fit() # same as Stata default formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)' mod = Poisson.from_formula(formula2, data=data, exposure=data['pyears'].values) mod.fit() constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agec`at)[4]' import patsy lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints) R, q = lc.coefs, lc.constants mod.fit_constrained(R, q, fit_kwds={'method': 'bfgs'}) # example without offset formula1a = 'deaths ~ logpyears + smokes + C(agecat)' mod1a = Poisson.from_formula(formula1a, data=data) mod1a.fit() lc_1a = patsy.DesignInfo(mod1a.exog_names).linear_constraint( 'C(agecat)[T.4] = C(agecat)[T.5]') mod1a.fit_constrained(lc_1a.coefs, lc_1a.constants, fit_kwds={'method': 'newton'})
def junk(): # Singular Matrix in mod1a.fit() formula1 = 'deaths ~ smokes + C(agecat)' formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)' # same as Stata default mod = Poisson.from_formula(formula2, data=data, exposure=data['pyears'].values) res0 = mod.fit() constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agecat)[4]' import patsy lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints) R, q = lc.coefs, lc.constants resc = mod.fit_constrained(R, q, fit_kwds={'method': 'bfgs'}) # example without offset formula1a = 'deaths ~ logpyears + smokes + C(agecat)' mod1a = Poisson.from_formula(formula1a, data=data) print(mod1a.exog.shape) res1a = mod1a.fit() lc_1a = patsy.DesignInfo( mod1a.exog_names).linear_constraint('C(agecat)[T.4] = C(agecat)[T.5]') resc1a = mod1a.fit_constrained(lc_1a.coefs, lc_1a.constants, fit_kwds={'method': 'newton'}) print(resc1a[0]) print(resc1a[1])
def setup_class(cls): cls.res2 = results.results_exposure_constraint2 #cls.idx = [3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical cls.idx = [6, 2, 3, 4, 5, 0] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data, offset=np.log(data['pyears'].values)) constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, fit_kwds={ 'method': 'newton', 'disp': 0 }) cls.constraints = lc # TODO: bfgs fails # test method of Poisson, not monkey patched cls.res1m = mod.fit_constrained(constr, method='bfgs', disp=0, start_params=cls.res1[0])
def setup_class(cls): cls.res2 = results.results_noexposure_constraint2 cls.idx = [7, 3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ logpyears + smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data) # get start_params, example fails to converge on one py TravisCI k_vars = len(mod.exog_names) start_params = np.zeros(k_vars) start_params[0] = np.log(mod.endog.mean()) # if we need it, this is desired params p = np.array([-9.43762015, 1.52762442, 2.74155711, 3.58730007, 4.08730007, 1.15987869, 0.12111539]) constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, start_params=start_params, fit_kwds={'method': 'bfgs', 'disp': 0}) # TODO: Newton fails # test method of Poisson, not monkey patched cls.res1m = mod.fit_constrained(constr, start_params=start_params, method='bfgs', disp=0)
def setup_class(cls): cls.res2 = results.results_noexposure_constraint cls.idx = [7, 3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ logpyears + smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data) #res1a = mod1a.fit() # get start_params, example fails to converge on one py TravisCI k_vars = len(mod.exog_names) start_params = np.zeros(k_vars) start_params[0] = np.log(mod.endog.mean()) # if we need it, this is desired params p = np.array([-3.93478643, 1.37276214, 2.33077032, 2.71338891, 2.71338891, 0.57966535, 0.97254074]) constr = 'C(agecat)[T.4] = C(agecat)[T.5]' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, start_params=start_params, fit_kwds={'method': 'bfgs', 'disp': 0}) # TODO: Newton fails # test method of Poisson, not monkey patched cls.res1m = mod.fit_constrained(constr, start_params=start_params, method='bfgs', disp=0)
def dmatrix(pipe, require, output, config=None): """ Create a design matrix from Patsy/R style design strings Args: pipe (yatsm.pipeline.Pipe): Piped data to operate on require (dict[str, list[str]]): Labels for the requirements of this calculation output (dict[str, list[str]]): Label for the result of this calculation config Returns: yatsm.pipeline.Pipe: Piped output """ design = config['design'] ds = pipe.data if '~' in design: X = patsy.dmatrices(design, ds) elif design.strip() == '1': X = np.ones((ds['time'].size, 1)) X = patsy.DesignMatrix(X, design_info=patsy.DesignInfo(['Intercept'])) else: X = patsy.dmatrix(design, ds) coords = (ds['time'], X.design_info.column_names) dims = ('time', 'terms') pipe.data[output['data'][0]] = xr.DataArray(X, coords, dims) return pipe
def build_equality_constraints_string( dmat: xr.DataArray, constraints: List[str], dims: list ): r""" Parser for string encoded equality constraints. :param dmat: Design matrix. :param constraints: List of constraints as strings. E.g. ["batch1 + batch2 + batch3 = 0"] :param dims: ["design_loc_params", "loc_params"] or ["design_scale_params", "scale_params"] Define dimension names of xarray. :return: a constraint matrix """ n_par_all = dmat.data_vars['design'].values.shape[1] n_par_free = n_par_all - len(constraints) di = patsy.DesignInfo(dmat.coords["design_params"].values) constraint_ls = [di.linear_constraint(x).coefs[0] for x in constraints] idx_constr = np.asarray([np.where(x == 1)[0][0] for x in constraint_ls]) idx_depending = [np.where(x == 1)[0][1:] for x in constraint_ls] idx_unconstr = np.asarray(list( set(np.asarray(range(n_par_all))) - set(idx_constr) )) dmat_var = xr.DataArray( dims=[dmat.data_vars['design'].dims[0], "params"], data=dmat.data_vars["design"][:,idx_unconstr], coords={dmat.data_vars['design'].dims[0]: dmat.coords["observations"].values, "params": dmat.coords["design_params"].values[idx_unconstr]} ) constraint_mat = np.zeros([n_par_all, n_par_free]) for i in range(n_par_all): if i in idx_constr: idx_dep_i = idx_depending[np.where(idx_constr == i)[0][0]] idx_dep_i = np.asarray([np.where(idx_unconstr == x)[0] for x in idx_dep_i]) constraint_mat[i, :] = 0 constraint_mat[i, idx_dep_i] = -1 else: idx_unconstr_i = np.where(idx_unconstr == i) constraint_mat[i, :] = 0 constraint_mat[i, idx_unconstr_i] = 1 constraints_ar = parse_constraints( dmat=dmat, constraints=constraint_mat, dims=dims ) # Test reduced design matrix for full rank before returning constraints: if np.linalg.matrix_rank(dmat_var) != np.linalg.matrix_rank(dmat_var.T): logger.warning("constrained design matrix is not full rank") return constraints_ar
def constraint_matrix_from_string( dmat: np.ndarray, coef_names: list, constraints: Union[Tuple[str, str], List[str]] ): r""" Create constraint matrix form string encoded equality constraints. :param dmat: Design matrix. :param constraints: List of constraints as strings. E.g. ["batch1 + batch2 + batch3 = 0"] :return: a constraint matrix """ assert len(constraints) > 0, "supply constraints" n_par_all = dmat.shape[1] n_par_free = n_par_all - len(constraints) di = patsy.DesignInfo(coef_names) constraint_ls = [di.linear_constraint(x).coefs[0] for x in constraints] # Check that constraints are sensible: for constraint_i in constraint_ls: if np.sum(constraint_i != 0) == 1: raise ValueError("a zero-equality constraint only involved one parameter: remove this parameter") idx_constr = np.asarray([np.where(x == 1)[0][0] for x in constraint_ls]) idx_depending = [np.where(x == 1)[0][1:] for x in constraint_ls] idx_unconstr = np.asarray(list( set(np.asarray(range(n_par_all))) - set(idx_constr) )) constraint_mat = np.zeros([n_par_all, n_par_free]) for i in range(n_par_all): if i in idx_constr: idx_dep_i = idx_depending[np.where(idx_constr == i)[0][0]] idx_dep_i = np.asarray([np.where(idx_unconstr == x)[0] for x in idx_dep_i]) constraint_mat[i, :] = 0 constraint_mat[i, idx_dep_i] = -1 else: idx_unconstr_i = np.where(idx_unconstr == i) constraint_mat[i, :] = 0 constraint_mat[i, idx_unconstr_i] = 1 # Test unconstrained subset design matrix for being full rank before returning constraints: if np.linalg.matrix_rank(dmat[:, idx_unconstr]) != np.linalg.matrix_rank(dmat[:, idx_unconstr].T): raise ValueError( "unconstrained sub-design matrix is not full rank" % np.linalg.matrix_rank(dmat[:, idx_unconstr]), np.linalg.matrix_rank(dmat[:, idx_unconstr].T) ) return constraint_mat
def setup_class(cls): # example without offset formula = 'deaths ~ logpyears + smokes + C(agecat)' mod = cls.model_cls.from_formula(formula, data=data, family=families.Poisson()) constr = 'C(agecat)[T.4] = C(agecat)[T.5]' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, fit_kwds={'atol': 1e-10}) cls.constraints = lc cls.res1m = mod.fit_constrained(constr, atol=1e-10)
def setup_class(cls): from statsmodels.base._constraints import fit_constrained cls.res2 = results.results_noexposure_constraint cls.idx = [7, 3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ logpyears + smokes + C(agecat)' mod = GLM.from_formula(formula, data=data, family=families.Poisson()) constr = 'C(agecat)[T.4] = C(agecat)[T.5]' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants) cls.constraints = lc cls.res1m = mod.fit_constrained(constr)
def setup_class(cls): from statsmodels.genmod.generalized_linear_model import GLM from statsmodels.genmod import families from statsmodels.base._constraints import fit_constrained cls.res2 = results.results_exposure_constraint cls.idx = [6, 2, 3, 4, 5, 0] # 2 is dropped baseline for categorical # example with offset formula = 'deaths ~ smokes + C(agecat)' mod = GLM.from_formula(formula, data=data, family=families.Poisson(), offset=np.log(data['pyears'].values)) constr = 'C(agecat)[T.4] = C(agecat)[T.5]' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants) cls.constraints = lc cls.res1m = mod.fit_constrained(constr)._results