Beispiel #1
0
    def test_setup(self):
        data = self.data
        resp = self.resp
        fittedvalues = resp.predict()

        formulas = ["apply ~ 1 + pared + public + gpa + C(dummy)",
                    "apply ~ pared + public + gpa + C(dummy)"]
        for formula in formulas:
            modf1 = OrderedModel.from_formula(formula, data, distr='logit')
            resf1 = modf1.fit(method='bfgs')
            summf1 = resf1.summary()
            summf1_str = str(summf1)
            assert resf1.model.exog_names == resp.model.exog_names
            assert resf1.model.data.param_names == resp.model.exog_names
            assert all(name in summf1_str for name in
                       resp.model.data.param_names)
            assert_allclose(resf1.predict(data[:5]), fittedvalues[:5])

        # test over parameterized model with implicit constant
        formula = "apply ~ 0 + pared + public + gpa + C(dummy)"

        with pytest.raises(ValueError):
            OrderedModel.from_formula(formula, data, distr='logit')

        # ignore constant, so we get results without exception
        modf2 = OrderedModel.from_formula(formula, data, distr='logit',
                                          hasconst=False)
        # we get a warning in some environments
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", HessianInversionWarning)
            resf2 = modf2.fit(method='bfgs')

        assert_allclose(resf2.predict(data[:5]), fittedvalues[:5], rtol=1e-4)
    def test_formula_categorical(self):

        resp = self.resp
        data = ds.df

        formula = "apply ~ pared + public + gpa - 1"
        modf2 = OrderedModel.from_formula(formula, data, distr='probit')
        resf2 = modf2.fit(method='bfgs', disp=False)
        assert_allclose(resf2.params, resp.params, atol=1e-8)
        assert modf2.exog_names == resp.model.exog_names
        assert modf2.data.ynames == resp.model.data.ynames
        assert hasattr(modf2.data, "frame")
        assert not hasattr(modf2, "frame")

        with pytest.raises(ValueError):
            # only ordered categorical or numerical endog are allowed
            # string endog raises ValueError
            OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
                                      data={
                                          "apply": np.asarray(data['apply']),
                                          "pared": data['pared'],
                                          "public": data['public'],
                                          "gpa": data['gpa']
                                      },
                                      distr='probit')
Beispiel #3
0
    def test_offset(self):

        resp = self.resp
        data = ds.df
        offset = np.ones(len(data))

        formula = "apply ~ pared + public + gpa - 1"
        modf2 = OrderedModel.from_formula(formula, data, offset=offset,
                                          distr='probit')
        resf2 = modf2.fit(method='bfgs', disp=False)

        assert_allclose(resf2.params[:3], resp.params[:3], atol=2e-4)
        assert_allclose(resf2.params[3], resp.params[3] + 1, atol=2e-4)

        fitted = resp.predict()
        fitted2 = resf2.predict()
        assert_allclose(fitted2, fitted, atol=2e-4)

        pred_ones = resf2.predict(data[:6], offset=np.ones(6))
        assert_allclose(pred_ones, fitted[:6], atol=2e-4)

        # check default is 0. if exog provided
        pred_zero1 = resf2.predict(data[:6])
        pred_zero2 = resf2.predict(data[:6], offset=0)
        assert_allclose(pred_zero1, pred_zero2, atol=2e-4)

        # compare with equivalent results frp, no-offset model
        pred_zero = resp.predict(data[['pared', 'public', 'gpa']].iloc[:6],
                                 offset=-np.ones(6))
        assert_allclose(pred_zero1, pred_zero, atol=2e-4)

        params_adj = resp.params.copy()
        params_adj[3] += 1
        fitted_zero = resp.model.predict(params_adj)
        assert_allclose(pred_zero1, fitted_zero[:6], atol=2e-4)
    def setup_class(cls):
        data = ds.df
        data_unordered = ds.df_unordered

        # a Scipy distribution defined minimally
        class CLogLog(stats.rv_continuous):
            def _ppf(self, q):
                return np.log(-np.log(1 - q))

            def _cdf(self, x):
                return 1 - np.exp(-np.exp(x))

        cloglog = CLogLog()

        mod = OrderedModel(data['apply'].values.codes,
                           np.asarray(data[['pared', 'public', 'gpa']], float),
                           distr=cloglog)
        res = mod.fit(method='bfgs', disp=False)

        modp = OrderedModel(data['apply'],
                            data[['pared', 'public', 'gpa']],
                            distr=cloglog)
        resp = modp.fit(method='bfgs', disp=False)

        # with pytest.warns(UserWarning):
        modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
                                         data={
                                             "apply":
                                             data['apply'].values.codes,
                                             "pared": data['pared'],
                                             "public": data['public'],
                                             "gpa": data['gpa']
                                         },
                                         distr=cloglog)
        resf = modf.fit(method='bfgs', disp=False)

        modu = OrderedModel(data_unordered['apply'].values.codes,
                            np.asarray(
                                data_unordered[['pared', 'public', 'gpa']],
                                float),
                            distr=cloglog)
        resu = modu.fit(method='bfgs', disp=False)

        from .results.results_ordinal_model import res_ord_cloglog as res2
        cls.res2 = res2
        cls.res1 = res
        cls.resp = resp
        cls.resf = resf
        cls.resu = resu
    def setup_class(cls):
        data = ds.df
        data_unordered = ds.df_unordered

        mod = OrderedModel(data['apply'].values.codes,
                           np.asarray(data[['pared', 'public', 'gpa']], float),
                           distr='probit')
        res = mod.fit(method='bfgs', disp=False)

        modp = OrderedModel(data['apply'],
                            data[['pared', 'public', 'gpa']],
                            distr='probit')
        resp = modp.fit(method='bfgs', disp=False)

        modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
                                         data={
                                             "apply":
                                             data['apply'].values.codes,
                                             "pared": data['pared'],
                                             "public": data['public'],
                                             "gpa": data['gpa']
                                         },
                                         distr='probit')
        resf = modf.fit(method='bfgs', disp=False)

        modu = OrderedModel(data_unordered['apply'].values.codes,
                            np.asarray(
                                data_unordered[['pared', 'public', 'gpa']],
                                float),
                            distr='probit')
        resu = modu.fit(method='bfgs', disp=False)

        from .results.results_ordinal_model import res_ord_probit as res2
        cls.res2 = res2
        cls.res1 = res
        cls.resp = resp
        cls.resf = resf
        cls.resu = resu

        # regression numbers
        cls.pred_table = np.array(
            [
                [202, 18, 0, 220],
                [112, 28, 0, 140],
                [27, 13, 0, 40],  # noqa
                [341, 59, 0, 400]
            ],
            dtype=np.int64)
    def setup_class(cls):
        data = ds.df
        data_unordered = ds.df_unordered

        # standard fit
        mod = OrderedModel(data['apply'].values.codes,
                           np.asarray(data[['pared', 'public', 'gpa']], float),
                           distr='logit')
        res = mod.fit(method='bfgs', disp=False)
        # standard fit with pandas input
        modp = OrderedModel(data['apply'],
                            data[['pared', 'public', 'gpa']],
                            distr='logit')
        resp = modp.fit(method='bfgs', disp=False)
        # fit with formula
        modf = OrderedModel.from_formula("apply ~ pared + public + gpa - 1",
                                         data={
                                             "apply":
                                             data['apply'].values.codes,
                                             "pared": data['pared'],
                                             "public": data['public'],
                                             "gpa": data['gpa']
                                         },
                                         distr='logit')
        resf = modf.fit(method='bfgs', disp=False)
        # fit on data with ordered=False
        modu = OrderedModel(data_unordered['apply'].values.codes,
                            np.asarray(
                                data_unordered[['pared', 'public', 'gpa']],
                                float),
                            distr='logit')
        resu = modu.fit(method='bfgs', disp=False)

        from .results.results_ordinal_model import res_ord_logit as res2
        cls.res2 = res2
        cls.res1 = res
        cls.resp = resp
        cls.resf = resf
        cls.resu = resu
Beispiel #7
0
def ordinal_regression_formula(data, formula, distr="probit"):
    model = OrderedModel.from_formula(formula=formula, data=data, distr=distr)
    result = model.fit(method="bfgs")
    summary = result.summary()
    odds_radio = get_odds_radio(result)
    return result, summary, odds_radio
data = pd.read_csv(r"D:/书籍资料整理/属性数据分析/政治意识与党派.csv")
# data['意识形态']=data['意识形态'].replace({'很自由':1,'有点自由':2,'中等':3,'有点保守':4,'很保守':5})
data['政治党派'] = data['政治党派'].replace({'民主党人': 1, '共和党人': 0})
tmp = pd.DataFrame()
for i in range(0, 20):
    tmp = tmp.append([data.loc[i]] * data.iloc[i]['值'])
tmp = tmp.reset_index()
del tmp['值']
del tmp['index']
# tmp.to_csv(r'D:/书籍资料整理/属性数据分析/政治意识与党派_整理数据.csv')
#得到的结果显示,自变量参数是反的.这个可以解释,因为使用的是α-βx展示
#书中的结果是α+βx
#但是截距从第二个开始就相去甚远很难找到解释理由,OrderedModel这个功能
#并非包内本身带的,文档也几乎没有提到.
#这个是将要被statsmodels带入的功能并没有完善待后续.
tmp['意识形态'] = tmp['意识形态'].astype('category')
s = pd.Series(["a", "b", "c", "a", "d", "e"])
cat_type = CategoricalDtype(categories=['很自由', '有点自由', '中等', '有点保守', '很保守'],
                            ordered=True)  #categories必须是一个列表
tmp['意识形态'] = tmp['意识形态'].astype(cat_type)

modf_logit = OrderedModel.from_formula("意识形态~政治党派", tmp, distr='logit')
resf_logit = modf_logit.fit(method='bfgs')
print(resf_logit.summary())

data = pd.read_csv(r"D:/书籍资料整理/属性数据分析/心灵伤害与SES.csv")

data['心理伤害'] = data['心理伤害'].replace({'健康': 0, '轻度': 1, '中等': 2, '受损': 3})
modf_logit = OrderedModel.from_formula("心理伤害~SES+生活事件", data, distr='logit')
resf_logit = modf_logit.fit()
resf_logit.summary()
cloglog = CLogLog()

# definition of the model and fitting
res_cloglog = OrderedModel(data_student['apply'],
                           data_student[['pared', 'public', 'gpa']],
                           distr=cloglog).fit(method='bfgs', disp=False)
res_cloglog.summary()

# ### Using formulas - treatment of endog
#
# Pandas' ordered categorical and numeric values are supported as
# dependent variable in formulas. Other types will raise a ValueError.

modf_logit = OrderedModel.from_formula("apply ~ 0 + pared + public + gpa",
                                       data_student,
                                       distr='logit')
resf_logit = modf_logit.fit(method='bfgs')
resf_logit.summary()

# Using numerical codes for the dependent variable is supported but loses
# the names of the category levels. The levels and names correspond to the
# unique values of the dependent variable sorted in alphanumeric order as in
# the case without using formulas.

data_student["apply_codes"] = data_student['apply'].cat.codes * 2 + 5
data_student["apply_codes"].head()

OrderedModel.from_formula("apply_codes ~ 0 + pared + public + gpa",
                          data_student,
                          distr='logit').fit().summary()