def test_setup(self): data = self.data resp = self.resp fittedvalues = resp.predict() formulas = ["apply ~ 1 + pared + public + gpa + C(dummy)", "apply ~ pared + public + gpa + C(dummy)"] for formula in formulas: modf1 = OrderedModel.from_formula(formula, data, distr='logit') resf1 = modf1.fit(method='bfgs') summf1 = resf1.summary() summf1_str = str(summf1) assert resf1.model.exog_names == resp.model.exog_names assert resf1.model.data.param_names == resp.model.exog_names assert all(name in summf1_str for name in resp.model.data.param_names) assert_allclose(resf1.predict(data[:5]), fittedvalues[:5]) # test over parameterized model with implicit constant formula = "apply ~ 0 + pared + public + gpa + C(dummy)" with pytest.raises(ValueError): OrderedModel.from_formula(formula, data, distr='logit') # ignore constant, so we get results without exception modf2 = OrderedModel.from_formula(formula, data, distr='logit', hasconst=False) # we get a warning in some environments with warnings.catch_warnings(): warnings.simplefilter("ignore", HessianInversionWarning) resf2 = modf2.fit(method='bfgs') assert_allclose(resf2.predict(data[:5]), fittedvalues[:5], rtol=1e-4)
def test_formula_categorical(self): resp = self.resp data = ds.df formula = "apply ~ pared + public + gpa - 1" modf2 = OrderedModel.from_formula(formula, data, distr='probit') resf2 = modf2.fit(method='bfgs', disp=False) assert_allclose(resf2.params, resp.params, atol=1e-8) assert modf2.exog_names == resp.model.exog_names assert modf2.data.ynames == resp.model.data.ynames assert hasattr(modf2.data, "frame") assert not hasattr(modf2, "frame") with pytest.raises(ValueError): # only ordered categorical or numerical endog are allowed # string endog raises ValueError OrderedModel.from_formula("apply ~ pared + public + gpa - 1", data={ "apply": np.asarray(data['apply']), "pared": data['pared'], "public": data['public'], "gpa": data['gpa'] }, distr='probit')
def test_offset(self): resp = self.resp data = ds.df offset = np.ones(len(data)) formula = "apply ~ pared + public + gpa - 1" modf2 = OrderedModel.from_formula(formula, data, offset=offset, distr='probit') resf2 = modf2.fit(method='bfgs', disp=False) assert_allclose(resf2.params[:3], resp.params[:3], atol=2e-4) assert_allclose(resf2.params[3], resp.params[3] + 1, atol=2e-4) fitted = resp.predict() fitted2 = resf2.predict() assert_allclose(fitted2, fitted, atol=2e-4) pred_ones = resf2.predict(data[:6], offset=np.ones(6)) assert_allclose(pred_ones, fitted[:6], atol=2e-4) # check default is 0. if exog provided pred_zero1 = resf2.predict(data[:6]) pred_zero2 = resf2.predict(data[:6], offset=0) assert_allclose(pred_zero1, pred_zero2, atol=2e-4) # compare with equivalent results frp, no-offset model pred_zero = resp.predict(data[['pared', 'public', 'gpa']].iloc[:6], offset=-np.ones(6)) assert_allclose(pred_zero1, pred_zero, atol=2e-4) params_adj = resp.params.copy() params_adj[3] += 1 fitted_zero = resp.model.predict(params_adj) assert_allclose(pred_zero1, fitted_zero[:6], atol=2e-4)
def setup_class(cls): data = ds.df data_unordered = ds.df_unordered # a Scipy distribution defined minimally class CLogLog(stats.rv_continuous): def _ppf(self, q): return np.log(-np.log(1 - q)) def _cdf(self, x): return 1 - np.exp(-np.exp(x)) cloglog = CLogLog() mod = OrderedModel(data['apply'].values.codes, np.asarray(data[['pared', 'public', 'gpa']], float), distr=cloglog) res = mod.fit(method='bfgs', disp=False) modp = OrderedModel(data['apply'], data[['pared', 'public', 'gpa']], distr=cloglog) resp = modp.fit(method='bfgs', disp=False) # with pytest.warns(UserWarning): modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1", data={ "apply": data['apply'].values.codes, "pared": data['pared'], "public": data['public'], "gpa": data['gpa'] }, distr=cloglog) resf = modf.fit(method='bfgs', disp=False) modu = OrderedModel(data_unordered['apply'].values.codes, np.asarray( data_unordered[['pared', 'public', 'gpa']], float), distr=cloglog) resu = modu.fit(method='bfgs', disp=False) from .results.results_ordinal_model import res_ord_cloglog as res2 cls.res2 = res2 cls.res1 = res cls.resp = resp cls.resf = resf cls.resu = resu
def setup_class(cls): data = ds.df data_unordered = ds.df_unordered mod = OrderedModel(data['apply'].values.codes, np.asarray(data[['pared', 'public', 'gpa']], float), distr='probit') res = mod.fit(method='bfgs', disp=False) modp = OrderedModel(data['apply'], data[['pared', 'public', 'gpa']], distr='probit') resp = modp.fit(method='bfgs', disp=False) modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1", data={ "apply": data['apply'].values.codes, "pared": data['pared'], "public": data['public'], "gpa": data['gpa'] }, distr='probit') resf = modf.fit(method='bfgs', disp=False) modu = OrderedModel(data_unordered['apply'].values.codes, np.asarray( data_unordered[['pared', 'public', 'gpa']], float), distr='probit') resu = modu.fit(method='bfgs', disp=False) from .results.results_ordinal_model import res_ord_probit as res2 cls.res2 = res2 cls.res1 = res cls.resp = resp cls.resf = resf cls.resu = resu # regression numbers cls.pred_table = np.array( [ [202, 18, 0, 220], [112, 28, 0, 140], [27, 13, 0, 40], # noqa [341, 59, 0, 400] ], dtype=np.int64)
def setup_class(cls): data = ds.df data_unordered = ds.df_unordered # standard fit mod = OrderedModel(data['apply'].values.codes, np.asarray(data[['pared', 'public', 'gpa']], float), distr='logit') res = mod.fit(method='bfgs', disp=False) # standard fit with pandas input modp = OrderedModel(data['apply'], data[['pared', 'public', 'gpa']], distr='logit') resp = modp.fit(method='bfgs', disp=False) # fit with formula modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1", data={ "apply": data['apply'].values.codes, "pared": data['pared'], "public": data['public'], "gpa": data['gpa'] }, distr='logit') resf = modf.fit(method='bfgs', disp=False) # fit on data with ordered=False modu = OrderedModel(data_unordered['apply'].values.codes, np.asarray( data_unordered[['pared', 'public', 'gpa']], float), distr='logit') resu = modu.fit(method='bfgs', disp=False) from .results.results_ordinal_model import res_ord_logit as res2 cls.res2 = res2 cls.res1 = res cls.resp = resp cls.resf = resf cls.resu = resu
def ordinal_regression_formula(data, formula, distr="probit"): model = OrderedModel.from_formula(formula=formula, data=data, distr=distr) result = model.fit(method="bfgs") summary = result.summary() odds_radio = get_odds_radio(result) return result, summary, odds_radio
data = pd.read_csv(r"D:/书籍资料整理/属性数据分析/政治意识与党派.csv") # data['意识形态']=data['意识形态'].replace({'很自由':1,'有点自由':2,'中等':3,'有点保守':4,'很保守':5}) data['政治党派'] = data['政治党派'].replace({'民主党人': 1, '共和党人': 0}) tmp = pd.DataFrame() for i in range(0, 20): tmp = tmp.append([data.loc[i]] * data.iloc[i]['值']) tmp = tmp.reset_index() del tmp['值'] del tmp['index'] # tmp.to_csv(r'D:/书籍资料整理/属性数据分析/政治意识与党派_整理数据.csv') #得到的结果显示,自变量参数是反的.这个可以解释,因为使用的是α-βx展示 #书中的结果是α+βx #但是截距从第二个开始就相去甚远很难找到解释理由,OrderedModel这个功能 #并非包内本身带的,文档也几乎没有提到. #这个是将要被statsmodels带入的功能并没有完善待后续. tmp['意识形态'] = tmp['意识形态'].astype('category') s = pd.Series(["a", "b", "c", "a", "d", "e"]) cat_type = CategoricalDtype(categories=['很自由', '有点自由', '中等', '有点保守', '很保守'], ordered=True) #categories必须是一个列表 tmp['意识形态'] = tmp['意识形态'].astype(cat_type) modf_logit = OrderedModel.from_formula("意识形态~政治党派", tmp, distr='logit') resf_logit = modf_logit.fit(method='bfgs') print(resf_logit.summary()) data = pd.read_csv(r"D:/书籍资料整理/属性数据分析/心灵伤害与SES.csv") data['心理伤害'] = data['心理伤害'].replace({'健康': 0, '轻度': 1, '中等': 2, '受损': 3}) modf_logit = OrderedModel.from_formula("心理伤害~SES+生活事件", data, distr='logit') resf_logit = modf_logit.fit() resf_logit.summary()
cloglog = CLogLog() # definition of the model and fitting res_cloglog = OrderedModel(data_student['apply'], data_student[['pared', 'public', 'gpa']], distr=cloglog).fit(method='bfgs', disp=False) res_cloglog.summary() # ### Using formulas - treatment of endog # # Pandas' ordered categorical and numeric values are supported as # dependent variable in formulas. Other types will raise a ValueError. modf_logit = OrderedModel.from_formula("apply ~ 0 + pared + public + gpa", data_student, distr='logit') resf_logit = modf_logit.fit(method='bfgs') resf_logit.summary() # Using numerical codes for the dependent variable is supported but loses # the names of the category levels. The levels and names correspond to the # unique values of the dependent variable sorted in alphanumeric order as in # the case without using formulas. data_student["apply_codes"] = data_student['apply'].cat.codes * 2 + 5 data_student["apply_codes"].head() OrderedModel.from_formula("apply_codes ~ 0 + pared + public + gpa", data_student, distr='logit').fit().summary()