print(np.exp(res.params[-(mod.k_levels - 1):]).cumsum()) # print(res.summary()) predicted = res.model.predict(res.params) pred_choice = predicted.argmax(1) print('Fraction of correct choice predictions') print((y == pred_choice).mean()) print('\ncomparing bincount') print(np.bincount(res.model.predict(res.params).argmax(1))) print(np.bincount(res.model.endog)) res_log = OrderedModel(y, x, distr='logit').fit(method='bfgs') pred_choice_log = res_log.predict().argmax(1) print((y == pred_choice_log).mean()) print(res_log.summary()) # example form UCLA Stats pages # http://www.ats.ucla.edu/stat/stata/dae/ologit.htm # requires downloaded dataset ologit.dta dataf = pandas.read_stata(r"M:\josef_new\scripts\ologit_ucla.dta") # this works but sorts category levels alphabetically res_log2 = OrderedModel(np.asarray(dataf['apply']), np.asarray(dataf[['pared', 'public', 'gpa']], float), distr='logit').fit(method='bfgs') # this replicates the UCLA example except # for different parameterization of par2 res_log3 = OrderedModel(dataf['apply'].values.codes,
pred_choice = predicted.argmax(1) print('Fraction of correct choice predictions') print((np.asarray(data_student['apply'].values.codes) == pred_choice).mean()) # ### Ordinal regression with a custom cumulative cLogLog distribution: # In addition to `logit` and `probit` regression, any continuous # distribution from `SciPy.stats` package can be used for the `distr` # argument. Alternatively, one can define its own distribution simply # creating a subclass from `rv_continuous` and implementing a few methods. # using a SciPy distribution res_exp = OrderedModel(data_student['apply'], data_student[['pared', 'public', 'gpa']], distr=stats.expon).fit(method='bfgs', disp=False) res_exp.summary() # minimal definition of a custom scipy distribution. class CLogLog(stats.rv_continuous): def _ppf(self, q): return np.log(-np.log(1 - q)) def _cdf(self, x): return 1 - np.exp(-np.exp(x)) cloglog = CLogLog() # definition of the model and fitting res_cloglog = OrderedModel(data_student['apply'],