def summary(self): """Summary statistics describing the fit. Returns ------- df : DataFrame Contains columns coef, np.exp(coef), se(coef), z, p, lower, upper""" ci = 100 * (1 - self.alpha) z = inv_normal_cdf(1 - self.alpha / 2) with np.errstate(invalid="ignore", divide="ignore", over="ignore", under="ignore"): df = pd.DataFrame(index=self.params_.index) df["coef"] = self.params_ df["exp(coef)"] = self.hazard_ratios_ df["se(coef)"] = self.standard_errors_ df["coef lower %g%%" % ci] = self.confidence_intervals_["%g%% lower-bound" % ci] df["coef upper %g%%" % ci] = self.confidence_intervals_["%g%% upper-bound" % ci] df["exp(coef) lower %g%%" % ci] = self.hazard_ratios_ * np.exp(-z * self.standard_errors_) df["exp(coef) upper %g%%" % ci] = self.hazard_ratios_ * np.exp(z * self.standard_errors_) df["z"] = self._compute_z_values() df["p"] = self._compute_p_values() df["-log2(p)"] = -utils.quiet_log2(df["p"]) return df
def print_summary(self, decimals=2, style=None, columns=None, **kwargs): """ Print summary statistics describing the fit, the coefficients, and the error bounds. Parameters ----------- decimals: int, optional (default=2) specify the number of decimal places to show style: string {html, ascii, latex} columns: only display a subset of ``summary`` columns. Default all. kwargs: print additional meta data in the output (useful to provide model names, dataset names, etc.) when comparing multiple outputs. """ justify = string_rjustify(18) headers = [] if self.event_col: headers.append(("event col", "'%s'" % self.event_col)) if self.weights_col: headers.append(("weights col", "'%s'" % self.weights_col)) if isinstance(self.penalizer, np.ndarray) or self.penalizer > 0: headers.append(("penalizer", self.penalizer)) if self.strata: headers.append(("strata", self.strata)) headers.extend([ ("number of subjects", self._n_unique), ("number of periods", self._n_examples), ("number of events", self.event_observed.sum()), ("partial log-likelihood", "{:.{prec}f}".format(self.log_likelihood_, prec=decimals)), ("time fit was run", self._time_fit_was_called), ]) sr = self.log_likelihood_ratio_test() footers = [] footers.extend([ ("Partial AIC", "{:.{prec}f}".format(self.AIC_partial_, prec=decimals)), ( "log-likelihood ratio test", "{:.{prec}f} on {} df".format(sr.test_statistic, sr.degrees_freedom, prec=decimals), ), ("-log2(p) of ll-ratio test", "{:.{prec}f}".format(-utils.quiet_log2(sr.p_value), prec=decimals)), ]) p = Printer(self, headers, footers, justify, kwargs, decimals, columns) p.print(style=style)
def to_ascii(self, decimals=2, **kwargs): extra_kwargs = dict(list(self._kwargs.items()) + list(kwargs.items())) meta_data = self._stringify_meta_data(extra_kwargs) df = self.summary df["-log2(p)"] = -utils.quiet_log2(df["p"]) s = self.__repr__() s += "\n" + meta_data + "\n" s += "---\n" s += df.to_string(float_format=format_floats(decimals), index=self.name is not None, formatters={"p": format_p_value(decimals)}) return s
def summary(self): """ Returns ------- DataFrame a DataFrame containing the test statistics and the p-value """ cols = ["test_statistic", "p"] # test to see if self.names is a tuple if self.name and isinstance(self.name[0], tuple): index = pd.MultiIndex.from_tuples(self.name) else: index = self.name df = pd.DataFrame(list(zip(self._test_statistic, self._p_value)), columns=cols, index=index).sort_index() df["-log2(p)"] = -utils.quiet_log2(df["p"]) return df
def deviance(self): return -utils.quiet_log2(self.log_likelihood_ratio_test().p_value)