Exemple #1
0
print(np.exp(res.params[-(mod.k_levels - 1):]).cumsum())
# print(res.summary())

predicted = res.model.predict(res.params)
pred_choice = predicted.argmax(1)
print('Fraction of correct choice predictions')
print((y == pred_choice).mean())

print('\ncomparing bincount')
print(np.bincount(res.model.predict(res.params).argmax(1)))
print(np.bincount(res.model.endog))

res_log = OrderedModel(y, x, distr='logit').fit(method='bfgs')
pred_choice_log = res_log.predict().argmax(1)
print((y == pred_choice_log).mean())
print(res_log.summary())

# example form UCLA Stats pages
# http://www.ats.ucla.edu/stat/stata/dae/ologit.htm
# requires downloaded dataset ologit.dta

dataf = pandas.read_stata(r"M:\josef_new\scripts\ologit_ucla.dta")

# this works but sorts category levels alphabetically
res_log2 = OrderedModel(np.asarray(dataf['apply']),
                        np.asarray(dataf[['pared', 'public', 'gpa']], float),
                        distr='logit').fit(method='bfgs')

# this replicates the UCLA example except
# for different parameterization of par2
res_log3 = OrderedModel(dataf['apply'].values.codes,
pred_choice = predicted.argmax(1)
print('Fraction of correct choice predictions')
print((np.asarray(data_student['apply'].values.codes) == pred_choice).mean())

# ### Ordinal regression with a custom cumulative cLogLog distribution:

# In addition to `logit` and `probit` regression, any continuous
# distribution from `SciPy.stats` package can be used for the `distr`
# argument. Alternatively, one can define its own distribution simply
# creating a subclass from `rv_continuous` and implementing a few methods.

# using a SciPy distribution
res_exp = OrderedModel(data_student['apply'],
                       data_student[['pared', 'public', 'gpa']],
                       distr=stats.expon).fit(method='bfgs', disp=False)
res_exp.summary()


# minimal definition of a custom scipy distribution.
class CLogLog(stats.rv_continuous):
    def _ppf(self, q):
        return np.log(-np.log(1 - q))

    def _cdf(self, x):
        return 1 - np.exp(-np.exp(x))


cloglog = CLogLog()

# definition of the model and fitting
res_cloglog = OrderedModel(data_student['apply'],