def print_summary(self, decimals=2, **kwargs): """ Print summary statistics describing the fit, the coefficients, and the error bounds. Parameters ----------- decimals: int, optional (default=2) specify the number of decimal places to show kwargs: print additional metadata in the output (useful to provide model names, dataset names, etc.) when comparing multiple outputs. """ justify = string_justify(18) print(self) print("{} = {}".format(justify("number of subjects"), self.durations.shape[0])) print("{} = {}".format(justify("number of events"), np.where(self.event_observed)[0].shape[0])) print("{} = {:.3f}".format(justify("log-likelihood"), self._log_likelihood)) for k, v in kwargs.items(): print("{} = {}\n".format(justify(k), v)) print(end="\n") print("---") df = self.summary df[""] = [significance_code(p) for p in df["p"]] print( df.to_string(float_format=format_floats(decimals), formatters={"p": format_p_value(decimals)})) print("---") print(significance_codes_as_text(), end="\n\n")
def plot(self, standardized=False, **kwargs): """ standardized: standardize each estimated coefficient and confidence interval endpoints by the standard error of the estimate. """ from matplotlib import pyplot as plt ax = kwargs.get('ax', None) or plt.figure().add_subplot(111) yaxis_locations = range(len(self.hazards_.columns)) summary = self.summary lower_bound = self.confidence_intervals_.loc['lower-bound'].copy() upper_bound = self.confidence_intervals_.loc['upper-bound'].copy() hazards = self.hazards_.values[0].copy() if standardized: se = summary['se(coef)'] lower_bound /= se upper_bound /= se hazards /= se order = np.argsort(hazards) ax.scatter(upper_bound.values[order], yaxis_locations, marker='|', c='k') ax.scatter(lower_bound.values[order], yaxis_locations, marker='|', c='k') ax.scatter(hazards[order], yaxis_locations, marker='o', c='k') ax.hlines(yaxis_locations, lower_bound.values[order], upper_bound.values[order], color='k', lw=1) tick_labels = [c + significance_code(p).strip() for (c, p) in summary['p'][order].iteritems()] plt.yticks(yaxis_locations, tick_labels) plt.xlabel("standardized coef" if standardized else "coef") return ax
def print_summary(self): """ Print summary statistics describing the fit. """ df = self.summary # Significance codes last df[''] = [significance_code(p) for p in df['p']] # Print information about data first print('n={}, number of events={}'.format( self.data.shape[0], np.where(self.event_observed)[0].shape[0]), end='\n\n') print(df.to_string(float_format=lambda f: '{:.3e}'.format(f))) # Significance code explanation print('---') print( "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ", end='\n\n') print("Concordance = {:.3f}".format( concordance_index( self.durations, -self.predict_partial_hazard(self.data).values.ravel(), self.event_observed))) return
def print_summary(self): """ Print summary statistics describing the fit, the coefficients, and the error bounds. """ # pylint: disable=unnecessary-lambda # Print information about data first justify = string_justify(18) print(self) print("{} = {}".format(justify("event col"), self.event_col)) print("{} = {}".format(justify("number of subjects"), self._n_unique)) print("{} = {}".format(justify("number of periods"), self._n_examples)) print("{} = {}".format(justify("number of events"), self.event_observed.sum())) print("{} = {:.3f}".format(justify("log-likelihood"), self._log_likelihood)) print( "{} = {} UTC".format(justify("time fit was run"), self._time_fit_was_called), end="\n\n" ) print("---") df = self.summary # Significance codes last df[""] = [significance_code(p) for p in df["p"]] print(df.to_string(float_format=lambda f: "{:4.4f}".format(f))) # Significance code explanation print("---") print(significance_codes_as_text(), end="\n\n") print( "Likelihood ratio test = {:.3f} on {} df, p={:.5f}".format( *self._compute_likelihood_ratio_test() ) )
def print_summary(self): """ Print summary statistics describing the fit, the coefficients, and the error bounds. """ # Print information about data first justify = string_justify(18) print(self) print("{} = {}".format(justify('event col'), self.event_col)) print('{} = {}'.format(justify('number of subjects'), self._n_unique)) print('{} = {}'.format(justify('number of periods'), self._n_examples)) print('{} = {}'.format(justify('number of events'), self.event_observed.sum())) print('{} = {:.3f}'.format(justify('log-likelihood'), self._log_likelihood)) print('{} = {} UTC'.format(justify('time fit was run'), self._time_fit_was_called), end='\n\n') print('---') df = self.summary # Significance codes last df[''] = [significance_code(p) for p in df['p']] print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f))) # Significance code explanation print('---') print(significance_codes_as_text(), end='\n\n') print("Likelihood ratio test = {:.3f} on {} df, p={:.5f}".format(*self._compute_likelihood_ratio_test())) return
def plot(self, standardized=False, columns=None, **kwargs): """ Produces a visual representation of the fitted coefficients, including their standard errors and magnitudes. Parameters: standardized: standardize each estimated coefficient and confidence interval endpoints by the standard error of the estimate. columns : list-like, default None Returns: ax: the matplotlib axis that be edited. """ from matplotlib import pyplot as plt ax = kwargs.get('ax', None) or plt.figure().add_subplot(111) if columns is not None: yaxis_locations = range(len(columns)) summary = self.summary.loc[columns] lower_bound = self.confidence_intervals_[columns].loc[ 'lower-bound'].copy() upper_bound = self.confidence_intervals_[columns].loc[ 'upper-bound'].copy() hazards = self.hazards_[columns].values[0].copy() else: yaxis_locations = range(len(self.hazards_.columns)) summary = self.summary lower_bound = self.confidence_intervals_.loc['lower-bound'].copy() upper_bound = self.confidence_intervals_.loc['upper-bound'].copy() hazards = self.hazards_.values[0].copy() if standardized: se = summary['se(coef)'] lower_bound /= se upper_bound /= se hazards /= se order = np.argsort(hazards) ax.scatter(upper_bound.values[order], yaxis_locations, marker='|', c='k') ax.scatter(lower_bound.values[order], yaxis_locations, marker='|', c='k') ax.scatter(hazards[order], yaxis_locations, marker='o', c='k') ax.hlines(yaxis_locations, lower_bound.values[order], upper_bound.values[order], color='k', lw=1) tick_labels = [ c + significance_code(p).strip() for (c, p) in summary['p'][order].iteritems() ] plt.yticks(yaxis_locations, tick_labels) plt.xlabel("standardized coef" if standardized else "coef") return ax
def plot(self, columns=None, display_significance_code=True, **errorbar_kwargs): """ Produces a visual representation of the coefficients, including their standard errors and magnitudes. Parameters ---------- columns : list, optional specifiy a subset of the columns to plot display_significance_code: bool, optional (default: True) display asteriks beside statistically significant variables errorbar_kwargs: pass in additional plotting commands to matplotlib errorbar command Returns ------- ax: matplotlib axis the matplotlib axis that be edited. """ from matplotlib import pyplot as plt ax = errorbar_kwargs.get("ax", None) or plt.figure().add_subplot(111) errorbar_kwargs.setdefault("c", "k") errorbar_kwargs.setdefault("fmt", "s") errorbar_kwargs.setdefault("markerfacecolor", "white") errorbar_kwargs.setdefault("markeredgewidth", 1.25) errorbar_kwargs.setdefault("elinewidth", 1.25) errorbar_kwargs.setdefault("capsize", 3) alpha2 = inv_normal_cdf((1.0 + self.alpha) / 2.0) if columns is None: columns = self.hazards_.columns yaxis_locations = list(range(len(columns))) summary = self.summary.loc[columns] symmetric_errors = alpha2 * self.standard_errors_[columns].squeeze().values.copy() hazards = self.hazards_[columns].values[0].copy() order = np.argsort(hazards) ax.errorbar(hazards[order], yaxis_locations, xerr=symmetric_errors[order], **errorbar_kwargs) best_ylim = ax.get_ylim() ax.vlines(0, -2, len(columns) + 1, linestyles="dashed", linewidths=1, alpha=0.65) ax.set_ylim(best_ylim) if display_significance_code: tick_labels = [c + significance_code(p).strip() for (c, p) in summary["p"][order].iteritems()] else: tick_labels = columns[order] plt.yticks(yaxis_locations, tick_labels) plt.xlabel("log(HR) (%g%% CI)" % (self.alpha * 100)) return ax
def print_summary(self, decimals=2, **kwargs): """ Print summary statistics describing the fit, the coefficients, and the error bounds. Parameters ----------- decimals: int, optional (default=2) specify the number of decimal places to show kwargs: print additional metadata in the output (useful to provide model names, dataset names, etc.) when comparing multiple outputs. """ # Print information about data first justify = string_justify(18) print(self) print("{} = '{}'".format(justify("event col"), self.event_col)) if self.weights_col: print("{} = '{}'".format(justify("weights col"), self.weights_col)) if self.strata: print("{} = {}".format(justify("strata"), self.strata)) print("{} = {}".format(justify("number of subjects"), self._n_unique)) print("{} = {}".format(justify("number of periods"), self._n_examples)) print("{} = {}".format(justify("number of events"), self.event_observed.sum())) print("{} = {:.{prec}f}".format(justify("log-likelihood"), self._log_likelihood, prec=decimals)) print("{} = {} UTC".format(justify("time fit was run"), self._time_fit_was_called)) for k, v in kwargs.items(): print("{} = {}\n".format(justify(k), v)) print(end="\n") print("---") df = self.summary # Significance codes last df[""] = [significance_code(p) for p in df["p"]] print( df.to_string(float_format=format_floats(decimals), formatters={"p": format_p_value(decimals)})) # Significance code explanation print("---") print(significance_codes_as_text(), end="\n\n") print( "Likelihood ratio test = {:.{prec}f} on {} df, log(p)={:.{prec}f}". format(*self._compute_likelihood_ratio_test(), prec=decimals))
def __unicode__(self): meta_data = self._pretty_print_meta_data(self._kwargs) df = self.summary df[''] = significance_code(self.p_value) s = "" s += "\n" + meta_data + "\n\n" s += df.to_string(float_format=lambda f: '{:4.4f}'.format(f), index=False) s += '\n---' s += "\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 " return s
def __unicode__(self): # pylint: disable=unnecessary-lambda meta_data = self._pretty_print_meta_data(self._kwargs) df = self.summary df[""] = significance_code(self.p_value) s = "" s += "\n" + meta_data + "\n\n" s += df.to_string(float_format=lambda f: "{:4.4f}".format(f), index=False) s += "\n---" s += "\n" + significance_codes_as_text() return s
def print_summary(self): """ Print summary statistics describing the fit. """ justify = string_justify(18) print(self) print('{} = {}'.format(justify('number of subjects'), self.durations.shape[0])) print('{} = {}'.format(justify('number of events'), np.where(self.event_observed)[0].shape[0])) print('{} = {:.3f}'.format(justify('log-likelihood'), self._log_likelihood), end='\n\n') df = self.summary df[''] = [significance_code(p) for p in df['p']] print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f))) print('---') print(significance_codes_as_text(), end='\n\n') return
def print_summary(self): """ Print summary statistics describing the fit. """ # Print information about data first justify = string_justify(18) print(self) print("{} = {}".format(justify('duration col'), self.duration_col)) print("{} = {}".format(justify('event col'), self.event_col)) if self.weights_col: print("{} = {}".format(justify('weights col'), self.weights_col)) if self.cluster_col: print("{} = {}".format(justify('cluster col'), self.cluster_col)) if self.robust or self.cluster_col: print("{} = {}".format(justify('robust variance'), True)) if self.strata: print('{} = {}'.format(justify('strata'), self.strata)) print('{} = {}'.format(justify('number of subjects'), self._n_examples)) print('{} = {}'.format(justify('number of events'), self.event_observed.sum())) print('{} = {:.3f}'.format(justify('log-likelihood'), self._log_likelihood)) print('{} = {}'.format(justify("time fit was run"), self._time_fit_was_called), end='\n\n') print('---') df = self.summary # Significance codes last df[''] = [significance_code(p) for p in df['p']] print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f))) # Significance code explanation print('---') print(significance_codes_as_text(), end='\n\n') print("Concordance = {:.3f}".format(self.score_)) print("Likelihood ratio test = {:.3f} on {} df, p={:.5f}".format( *self._compute_likelihood_ratio_test())) return
def print_summary(self): """ Print summary statistics describing the fit. """ # pylint: disable=unnecessary-lambda # Print information about data first justify = string_justify(18) print(self) print("{} = {}".format(justify("duration col"), self.duration_col)) print("{} = {}".format(justify("event col"), self.event_col)) if self.weights_col: print("{} = {}".format(justify("weights col"), self.weights_col)) if self.cluster_col: print("{} = {}".format(justify("cluster col"), self.cluster_col)) if self.robust or self.cluster_col: print("{} = {}".format(justify("robust variance"), True)) if self.strata: print("{} = {}".format(justify("strata"), self.strata)) print("{} = {}".format(justify("number of subjects"), self._n_examples)) print("{} = {}".format(justify("number of events"), self.event_observed.sum())) print("{} = {:.3f}".format(justify("log-likelihood"), self._log_likelihood)) print("{} = {}".format(justify("time fit was run"), self._time_fit_was_called), end="\n\n") print("---") df = self.summary # Significance codes last df[""] = [significance_code(p) for p in df["p"]] print(df.to_string(float_format=lambda f: "{:4.4f}".format(f))) # Significance code explanation print("---") print(significance_codes_as_text(), end="\n\n") print("Concordance = {:.3f}".format(self.score_)) print("Likelihood ratio test = {:.3f} on {} df, p={:.5f}".format( *self._compute_likelihood_ratio_test()))
def print_summary(self): """ Print summary statistics describing the fit, the coefficients, and the error bounds. """ df = self.summary # Significance codes last df[''] = [significance_code(p) for p in df['p']] # Print information about data first print('periods={}, uniques={}, number of events={}'.format(self._n_examples, self._n_unique, self.event_observed.sum()), end='\n\n') print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f))) # Significance code explanation print('---') print("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ", end='\n\n') print("Likelihood ratio test = {:.3f} on {} df, p={:.5f}".format(*self._compute_likelihood_ratio_test())) return
def print_summary(self): """ Print summary statistics describing the fit. """ df = self.summary # Significance codes last df[''] = [significance_code(p) for p in df['p']] # Print information about data first print('n={}, number of events={}'.format(self._n_examples, np.where(self.event_observed)[0].shape[0]), end='\n\n') print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f))) # Significance code explanation print('---') print("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ", end='\n\n') return
def _to_string(self, decimals=2, **kwargs): extra_kwargs = dict(list(self._kwargs.items()) + list(kwargs.items())) meta_data = self._stringify_meta_data(extra_kwargs) df = self.summary df["log(p)"] = np.log(df["p"]) df[""] = [significance_code(p) for p in df["p"]] s = self.__repr__() s += "\n" + meta_data + "\n" s += "---\n" s += df.to_string( float_format=format_floats(decimals), index=self.name is not None, formatters={"p": format_p_value(decimals)}, ) s += "\n---" s += "\n" + significance_codes_as_text() return s
def print_summary(self): """ Print summary statistics describing the fit. """ df = self.summary # Significance codes last df[''] = [significance_code(p) for p in df['p']] # Print information about data first print('n={}, number of events={}'.format(self._n_examples, np.where(self.event_observed)[0].shape[0]), end='\n\n') print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f))) # Significance code explanation print('---') print("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ", end='\n\n') print("Concordance = {:.3f}".format(self.score_)) return
def print_summary(self): """ Print summary statistics describing the fit. """ # pylint: disable=unnecessary-lambda justify = string_justify(18) print(self) print("{} = {}".format(justify("number of subjects"), self.durations.shape[0])) print("{} = {}".format(justify("number of events"), np.where(self.event_observed)[0].shape[0])) print("{} = {:.3f}".format(justify("log-likelihood"), self._log_likelihood), end="\n\n") df = self.summary df[""] = [significance_code(p) for p in df["p"]] print(df.to_string(float_format=lambda f: "{:4.4f}".format(f))) print("---") print(significance_codes_as_text(), end="\n\n")
def print_summary(self): """ Print summary statistics describing the fit. """ df = self.summary # Significance codes last df[''] = [significance_code(p) for p in df['p']] # Print information about data first print('n={}, number of events={}'.format(self.data.shape[0], np.where(self.event_observed)[0].shape[0]), end='\n\n') print(df.to_string(float_format=lambda f: '{:.3e}'.format(f))) # Significance code explanation print('---') print("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ", end='\n\n') print("Concordance = {:.3f}" .format(concordance_index(self.durations, -self.predict_partial_hazard(self.data).values.ravel(), self.event_observed))) return
def plot(self, standardized=False, columns=None, **kwargs): """ Produces a visual representation of the fitted coefficients, including their standard errors and magnitudes. Parameters ---------- standardized: bool, optional standardize each estimated coefficient and confidence interval endpoints by the standard error of the estimate. columns : list, optional specifiy a subset of the columns to plot kwargs: pass in additional plotting commands Returns ------- ax: matplotlib axis the matplotlib axis that be edited. """ from matplotlib import pyplot as plt ax = kwargs.get("ax", None) or plt.figure().add_subplot(111) if columns is not None: yaxis_locations = range(len(columns)) summary = self.summary.loc[columns] lower_bound = self.confidence_intervals_[columns].loc[ "lower-bound"].copy() upper_bound = self.confidence_intervals_[columns].loc[ "upper-bound"].copy() hazards = self.hazards_[columns].values[0].copy() else: yaxis_locations = range(len(self.hazards_.columns)) summary = self.summary lower_bound = self.confidence_intervals_.loc["lower-bound"].copy() upper_bound = self.confidence_intervals_.loc["upper-bound"].copy() hazards = self.hazards_.values[0].copy() if standardized: se = summary["se(coef)"] lower_bound /= se upper_bound /= se hazards /= se order = np.argsort(hazards) ax.scatter(upper_bound.values[order], yaxis_locations, marker="|", c="k") ax.scatter(lower_bound.values[order], yaxis_locations, marker="|", c="k") ax.scatter(hazards[order], yaxis_locations, marker="o", c="k") ax.hlines(yaxis_locations, lower_bound.values[order], upper_bound.values[order], color="k", lw=1) tick_labels = [ c + significance_code(p).strip() for (c, p) in summary["p"][order].iteritems() ] plt.yticks(yaxis_locations, tick_labels) plt.xlabel("standardized coef" if standardized else "coef") return ax