Example #1
0
    def print_summary(self, decimals=2, **kwargs):
        """
        Print summary statistics describing the fit, the coefficients, and the error bounds.

        Parameters
        -----------
        decimals: int, optional (default=2)
            specify the number of decimal places to show
        kwargs:
            print additional metadata in the output (useful to provide model names, dataset names, etc.) when comparing 
            multiple outputs. 

        """
        justify = string_justify(18)
        print(self)
        print("{} = {}".format(justify("number of subjects"),
                               self.durations.shape[0]))
        print("{} = {}".format(justify("number of events"),
                               np.where(self.event_observed)[0].shape[0]))
        print("{} = {:.3f}".format(justify("log-likelihood"),
                                   self._log_likelihood))

        for k, v in kwargs.items():
            print("{} = {}\n".format(justify(k), v))

        print(end="\n")
        print("---")

        df = self.summary
        df[""] = [significance_code(p) for p in df["p"]]
        print(
            df.to_string(float_format=format_floats(decimals),
                         formatters={"p": format_p_value(decimals)}))
        print("---")
        print(significance_codes_as_text(), end="\n\n")
    def plot(self, standardized=False, **kwargs):
        """
        standardized: standardize each estimated coefficient and confidence interval endpoints by the standard error of the estimate.

        """
        from matplotlib import pyplot as plt

        ax = kwargs.get('ax', None) or plt.figure().add_subplot(111)
        yaxis_locations = range(len(self.hazards_.columns))

        summary = self.summary
        lower_bound = self.confidence_intervals_.loc['lower-bound'].copy()
        upper_bound = self.confidence_intervals_.loc['upper-bound'].copy()
        hazards = self.hazards_.values[0].copy()

        if standardized:
            se = summary['se(coef)']
            lower_bound /= se
            upper_bound /= se
            hazards /= se

        order = np.argsort(hazards)
        ax.scatter(upper_bound.values[order], yaxis_locations, marker='|', c='k')
        ax.scatter(lower_bound.values[order], yaxis_locations, marker='|', c='k')
        ax.scatter(hazards[order], yaxis_locations, marker='o', c='k')
        ax.hlines(yaxis_locations, lower_bound.values[order], upper_bound.values[order], color='k', lw=1)

        tick_labels = [c + significance_code(p).strip() for (c, p) in summary['p'][order].iteritems()]
        plt.yticks(yaxis_locations, tick_labels)
        plt.xlabel("standardized coef" if standardized else "coef")
        return ax
Example #3
0
    def print_summary(self):
        """
        Print summary statistics describing the fit.

        """
        df = self.summary
        # Significance codes last
        df[''] = [significance_code(p) for p in df['p']]

        # Print information about data first
        print('n={}, number of events={}'.format(
            self.data.shape[0],
            np.where(self.event_observed)[0].shape[0]),
              end='\n\n')
        print(df.to_string(float_format=lambda f: '{:.3e}'.format(f)))
        # Significance code explanation
        print('---')
        print(
            "Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ",
            end='\n\n')
        print("Concordance = {:.3f}".format(
            concordance_index(
                self.durations,
                -self.predict_partial_hazard(self.data).values.ravel(),
                self.event_observed)))
        return
    def print_summary(self):
        """
        Print summary statistics describing the fit, the coefficients, and the error bounds.
        """
        # pylint: disable=unnecessary-lambda
        # Print information about data first
        justify = string_justify(18)
        print(self)
        print("{} = {}".format(justify("event col"), self.event_col))
        print("{} = {}".format(justify("number of subjects"), self._n_unique))
        print("{} = {}".format(justify("number of periods"), self._n_examples))
        print("{} = {}".format(justify("number of events"), self.event_observed.sum()))
        print("{} = {:.3f}".format(justify("log-likelihood"), self._log_likelihood))
        print(
            "{} = {} UTC".format(justify("time fit was run"), self._time_fit_was_called), end="\n\n"
        )

        print("---")

        df = self.summary
        # Significance codes last
        df[""] = [significance_code(p) for p in df["p"]]
        print(df.to_string(float_format=lambda f: "{:4.4f}".format(f)))
        # Significance code explanation
        print("---")
        print(significance_codes_as_text(), end="\n\n")
        print(
            "Likelihood ratio test = {:.3f} on {} df, p={:.5f}".format(
                *self._compute_likelihood_ratio_test()
            )
        )
Example #5
0
    def plot(self, standardized=False, **kwargs):
        """
        standardized: standardize each estimated coefficient and confidence interval endpoints by the standard error of the estimate.

        """
        from matplotlib import pyplot as plt

        ax = kwargs.get('ax', None) or plt.figure().add_subplot(111)
        yaxis_locations = range(len(self.hazards_.columns))

        summary = self.summary
        lower_bound = self.confidence_intervals_.loc['lower-bound'].copy()
        upper_bound = self.confidence_intervals_.loc['upper-bound'].copy()
        hazards = self.hazards_.values[0].copy()

        if standardized:
            se = summary['se(coef)']
            lower_bound /= se
            upper_bound /= se
            hazards /= se

        order = np.argsort(hazards)
        ax.scatter(upper_bound.values[order], yaxis_locations, marker='|', c='k')
        ax.scatter(lower_bound.values[order], yaxis_locations, marker='|', c='k')
        ax.scatter(hazards[order], yaxis_locations, marker='o', c='k')
        ax.hlines(yaxis_locations, lower_bound.values[order], upper_bound.values[order], color='k', lw=1)

        tick_labels = [c + significance_code(p).strip() for (c, p) in summary['p'][order].iteritems()]
        plt.yticks(yaxis_locations, tick_labels)
        plt.xlabel("standardized coef" if standardized else "coef")
        return ax
Example #6
0
    def print_summary(self):
        """
        Print summary statistics describing the fit, the coefficients, and the error bounds.
        """

        # Print information about data first
        justify = string_justify(18)
        print(self)
        print("{} = {}".format(justify('event col'), self.event_col))
        print('{} = {}'.format(justify('number of subjects'), self._n_unique))
        print('{} = {}'.format(justify('number of periods'), self._n_examples))
        print('{} = {}'.format(justify('number of events'), self.event_observed.sum()))
        print('{} = {:.3f}'.format(justify('log-likelihood'), self._log_likelihood))
        print('{} = {} UTC'.format(justify('time fit was run'), self._time_fit_was_called), end='\n\n')


        print('---')

        df = self.summary
        # Significance codes last
        df[''] = [significance_code(p) for p in df['p']]
        print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f)))
        # Significance code explanation
        print('---')
        print(significance_codes_as_text(), end='\n\n')
        print("Likelihood ratio test = {:.3f} on {} df, p={:.5f}".format(*self._compute_likelihood_ratio_test()))
        return
Example #7
0
    def plot(self, standardized=False, columns=None, **kwargs):
        """
        Produces a visual representation of the fitted coefficients, including their standard errors and magnitudes.

        Parameters:
            standardized: standardize each estimated coefficient and confidence interval
                          endpoints by the standard error of the estimate.
            columns : list-like, default None
        Returns:
            ax: the matplotlib axis that be edited.

        """
        from matplotlib import pyplot as plt

        ax = kwargs.get('ax', None) or plt.figure().add_subplot(111)

        if columns is not None:
            yaxis_locations = range(len(columns))
            summary = self.summary.loc[columns]
            lower_bound = self.confidence_intervals_[columns].loc[
                'lower-bound'].copy()
            upper_bound = self.confidence_intervals_[columns].loc[
                'upper-bound'].copy()
            hazards = self.hazards_[columns].values[0].copy()
        else:
            yaxis_locations = range(len(self.hazards_.columns))
            summary = self.summary
            lower_bound = self.confidence_intervals_.loc['lower-bound'].copy()
            upper_bound = self.confidence_intervals_.loc['upper-bound'].copy()
            hazards = self.hazards_.values[0].copy()

        if standardized:
            se = summary['se(coef)']
            lower_bound /= se
            upper_bound /= se
            hazards /= se

        order = np.argsort(hazards)
        ax.scatter(upper_bound.values[order],
                   yaxis_locations,
                   marker='|',
                   c='k')
        ax.scatter(lower_bound.values[order],
                   yaxis_locations,
                   marker='|',
                   c='k')
        ax.scatter(hazards[order], yaxis_locations, marker='o', c='k')
        ax.hlines(yaxis_locations,
                  lower_bound.values[order],
                  upper_bound.values[order],
                  color='k',
                  lw=1)

        tick_labels = [
            c + significance_code(p).strip()
            for (c, p) in summary['p'][order].iteritems()
        ]
        plt.yticks(yaxis_locations, tick_labels)
        plt.xlabel("standardized coef" if standardized else "coef")
        return ax
    def plot(self, columns=None, display_significance_code=True, **errorbar_kwargs):
        """
        Produces a visual representation of the coefficients, including their standard errors and magnitudes.

        Parameters
        ----------
        columns : list, optional
            specifiy a subset of the columns to plot
        display_significance_code: bool, optional (default: True)
            display asteriks beside statistically significant variables
        errorbar_kwargs:
            pass in additional plotting commands to matplotlib errorbar command

        Returns
        -------
        ax: matplotlib axis
            the matplotlib axis that be edited.

        """
        from matplotlib import pyplot as plt

        ax = errorbar_kwargs.get("ax", None) or plt.figure().add_subplot(111)

        errorbar_kwargs.setdefault("c", "k")
        errorbar_kwargs.setdefault("fmt", "s")
        errorbar_kwargs.setdefault("markerfacecolor", "white")
        errorbar_kwargs.setdefault("markeredgewidth", 1.25)
        errorbar_kwargs.setdefault("elinewidth", 1.25)
        errorbar_kwargs.setdefault("capsize", 3)

        alpha2 = inv_normal_cdf((1.0 + self.alpha) / 2.0)

        if columns is None:
            columns = self.hazards_.columns

        yaxis_locations = list(range(len(columns)))
        summary = self.summary.loc[columns]
        symmetric_errors = alpha2 * self.standard_errors_[columns].squeeze().values.copy()
        hazards = self.hazards_[columns].values[0].copy()

        order = np.argsort(hazards)

        ax.errorbar(hazards[order], yaxis_locations, xerr=symmetric_errors[order], **errorbar_kwargs)
        best_ylim = ax.get_ylim()
        ax.vlines(0, -2, len(columns) + 1, linestyles="dashed", linewidths=1, alpha=0.65)
        ax.set_ylim(best_ylim)

        if display_significance_code:
            tick_labels = [c + significance_code(p).strip() for (c, p) in summary["p"][order].iteritems()]
        else:
            tick_labels = columns[order]

        plt.yticks(yaxis_locations, tick_labels)
        plt.xlabel("log(HR) (%g%% CI)" % (self.alpha * 100))

        return ax
Example #9
0
    def print_summary(self, decimals=2, **kwargs):
        """
        Print summary statistics describing the fit, the coefficients, and the error bounds.

        Parameters
        -----------
        decimals: int, optional (default=2)
            specify the number of decimal places to show
        kwargs:
            print additional metadata in the output (useful to provide model names, dataset names, etc.) when comparing 
            multiple outputs. 

        """

        # Print information about data first
        justify = string_justify(18)

        print(self)
        print("{} = '{}'".format(justify("event col"), self.event_col))

        if self.weights_col:
            print("{} = '{}'".format(justify("weights col"), self.weights_col))

        if self.strata:
            print("{} = {}".format(justify("strata"), self.strata))

        print("{} = {}".format(justify("number of subjects"), self._n_unique))
        print("{} = {}".format(justify("number of periods"), self._n_examples))
        print("{} = {}".format(justify("number of events"),
                               self.event_observed.sum()))
        print("{} = {:.{prec}f}".format(justify("log-likelihood"),
                                        self._log_likelihood,
                                        prec=decimals))
        print("{} = {} UTC".format(justify("time fit was run"),
                                   self._time_fit_was_called))

        for k, v in kwargs.items():
            print("{} = {}\n".format(justify(k), v))

        print(end="\n")
        print("---")

        df = self.summary
        # Significance codes last
        df[""] = [significance_code(p) for p in df["p"]]
        print(
            df.to_string(float_format=format_floats(decimals),
                         formatters={"p": format_p_value(decimals)}))

        # Significance code explanation
        print("---")
        print(significance_codes_as_text(), end="\n\n")
        print(
            "Likelihood ratio test = {:.{prec}f} on {} df, log(p)={:.{prec}f}".
            format(*self._compute_likelihood_ratio_test(), prec=decimals))
Example #10
0
    def __unicode__(self):
        meta_data = self._pretty_print_meta_data(self._kwargs)
        df = self.summary
        df[''] = significance_code(self.p_value)

        s = ""
        s += "\n" + meta_data + "\n\n"
        s += df.to_string(float_format=lambda f: '{:4.4f}'.format(f), index=False)

        s += '\n---'
        s += "\nSignif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 "
        return s
Example #11
0
    def __unicode__(self):
        meta_data = self._pretty_print_meta_data(self._kwargs)
        df = self.summary
        df[''] = significance_code(self.p_value)

        s = ""
        s += "\n" + meta_data + "\n\n"
        s += df.to_string(float_format=lambda f: '{:4.4f}'.format(f), index=False)

        s += '\n---'
        s += "\nSignif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 "
        return s
Example #12
0
    def __unicode__(self):
        # pylint: disable=unnecessary-lambda
        meta_data = self._pretty_print_meta_data(self._kwargs)
        df = self.summary
        df[""] = significance_code(self.p_value)

        s = ""
        s += "\n" + meta_data + "\n\n"
        s += df.to_string(float_format=lambda f: "{:4.4f}".format(f),
                          index=False)

        s += "\n---"
        s += "\n" + significance_codes_as_text()
        return s
Example #13
0
    def print_summary(self):
        """
        Print summary statistics describing the fit.

        """
        justify = string_justify(18)
        print(self)
        print('{} = {}'.format(justify('number of subjects'), self.durations.shape[0]))
        print('{} = {}'.format(justify('number of events'), np.where(self.event_observed)[0].shape[0]))
        print('{} = {:.3f}'.format(justify('log-likelihood'), self._log_likelihood), end='\n\n')

        df = self.summary
        df[''] = [significance_code(p) for p in df['p']]
        print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f)))
        print('---')
        print(significance_codes_as_text(), end='\n\n')
        return
Example #14
0
    def print_summary(self):
        """
        Print summary statistics describing the fit.

        """

        # Print information about data first
        justify = string_justify(18)
        print(self)
        print("{} = {}".format(justify('duration col'), self.duration_col))
        print("{} = {}".format(justify('event col'), self.event_col))
        if self.weights_col:
            print("{} = {}".format(justify('weights col'), self.weights_col))

        if self.cluster_col:
            print("{} = {}".format(justify('cluster col'), self.cluster_col))

        if self.robust or self.cluster_col:
            print("{} = {}".format(justify('robust variance'), True))

        if self.strata:
            print('{} = {}'.format(justify('strata'), self.strata))

        print('{} = {}'.format(justify('number of subjects'),
                               self._n_examples))
        print('{} = {}'.format(justify('number of events'),
                               self.event_observed.sum()))
        print('{} = {:.3f}'.format(justify('log-likelihood'),
                                   self._log_likelihood))
        print('{} = {}'.format(justify("time fit was run"),
                               self._time_fit_was_called),
              end='\n\n')
        print('---')

        df = self.summary
        # Significance codes last
        df[''] = [significance_code(p) for p in df['p']]
        print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f)))
        # Significance code explanation
        print('---')
        print(significance_codes_as_text(), end='\n\n')
        print("Concordance = {:.3f}".format(self.score_))
        print("Likelihood ratio test = {:.3f} on {} df, p={:.5f}".format(
            *self._compute_likelihood_ratio_test()))
        return
Example #15
0
    def print_summary(self):
        """
        Print summary statistics describing the fit.

        """
        # pylint: disable=unnecessary-lambda

        # Print information about data first
        justify = string_justify(18)
        print(self)
        print("{} = {}".format(justify("duration col"), self.duration_col))
        print("{} = {}".format(justify("event col"), self.event_col))
        if self.weights_col:
            print("{} = {}".format(justify("weights col"), self.weights_col))

        if self.cluster_col:
            print("{} = {}".format(justify("cluster col"), self.cluster_col))

        if self.robust or self.cluster_col:
            print("{} = {}".format(justify("robust variance"), True))

        if self.strata:
            print("{} = {}".format(justify("strata"), self.strata))

        print("{} = {}".format(justify("number of subjects"),
                               self._n_examples))
        print("{} = {}".format(justify("number of events"),
                               self.event_observed.sum()))
        print("{} = {:.3f}".format(justify("log-likelihood"),
                                   self._log_likelihood))
        print("{} = {}".format(justify("time fit was run"),
                               self._time_fit_was_called),
              end="\n\n")
        print("---")

        df = self.summary
        # Significance codes last
        df[""] = [significance_code(p) for p in df["p"]]
        print(df.to_string(float_format=lambda f: "{:4.4f}".format(f)))
        # Significance code explanation
        print("---")
        print(significance_codes_as_text(), end="\n\n")
        print("Concordance = {:.3f}".format(self.score_))
        print("Likelihood ratio test = {:.3f} on {} df, p={:.5f}".format(
            *self._compute_likelihood_ratio_test()))
Example #16
0
    def print_summary(self):
        """
        Print summary statistics describing the fit, the coefficients, and the error bounds.
        """
        df = self.summary
        # Significance codes last
        df[''] = [significance_code(p) for p in df['p']]

        # Print information about data first
        print('periods={}, uniques={}, number of events={}'.format(self._n_examples, self._n_unique,
                                                                   self.event_observed.sum()),
              end='\n\n')
        print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f)))
        # Significance code explanation
        print('---')
        print("Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ",
              end='\n\n')
        print("Likelihood ratio test = {:.3f} on {} df, p={:.5f}".format(*self._compute_likelihood_ratio_test()))
        return
    def print_summary(self):
        """
        Print summary statistics describing the fit.

        """
        df = self.summary
        # Significance codes last
        df[''] = [significance_code(p) for p in df['p']]

        # Print information about data first
        print('n={}, number of events={}'.format(self._n_examples,
                                                 np.where(self.event_observed)[0].shape[0]),
              end='\n\n')
        print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f)))
        # Significance code explanation
        print('---')
        print("Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ",
              end='\n\n')
        return
Example #18
0
    def _to_string(self, decimals=2, **kwargs):
        extra_kwargs = dict(list(self._kwargs.items()) + list(kwargs.items()))
        meta_data = self._stringify_meta_data(extra_kwargs)
        df = self.summary
        df["log(p)"] = np.log(df["p"])
        df[""] = [significance_code(p) for p in df["p"]]

        s = self.__repr__()
        s += "\n" + meta_data + "\n"
        s += "---\n"
        s += df.to_string(
            float_format=format_floats(decimals),
            index=self.name is not None,
            formatters={"p": format_p_value(decimals)},
        )

        s += "\n---"
        s += "\n" + significance_codes_as_text()
        return s
Example #19
0
    def print_summary(self):
        """
        Print summary statistics describing the fit.

        """
        df = self.summary
        # Significance codes last
        df[''] = [significance_code(p) for p in df['p']]

        # Print information about data first
        print('n={}, number of events={}'.format(self._n_examples,
                                                 np.where(self.event_observed)[0].shape[0]),
              end='\n\n')
        print(df.to_string(float_format=lambda f: '{:4.4f}'.format(f)))
        # Significance code explanation
        print('---')
        print("Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ",
              end='\n\n')
        print("Concordance = {:.3f}".format(self.score_))
        return
    def print_summary(self):
        """
        Print summary statistics describing the fit.

        """
        # pylint: disable=unnecessary-lambda
        justify = string_justify(18)
        print(self)
        print("{} = {}".format(justify("number of subjects"),
                               self.durations.shape[0]))
        print("{} = {}".format(justify("number of events"),
                               np.where(self.event_observed)[0].shape[0]))
        print("{} = {:.3f}".format(justify("log-likelihood"),
                                   self._log_likelihood),
              end="\n\n")

        df = self.summary
        df[""] = [significance_code(p) for p in df["p"]]
        print(df.to_string(float_format=lambda f: "{:4.4f}".format(f)))
        print("---")
        print(significance_codes_as_text(), end="\n\n")
Example #21
0
    def print_summary(self):
        """
        Print summary statistics describing the fit.

        """
        df = self.summary
        # Significance codes last
        df[''] = [significance_code(p) for p in df['p']]

        # Print information about data first
        print('n={}, number of events={}'.format(self.data.shape[0],
                                                 np.where(self.event_observed)[0].shape[0]),
              end='\n\n')
        print(df.to_string(float_format=lambda f: '{:.3e}'.format(f)))
        # Significance code explanation
        print('---')
        print("Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ",
              end='\n\n')
        print("Concordance = {:.3f}"
              .format(concordance_index(self.durations,
                                        -self.predict_partial_hazard(self.data).values.ravel(),
                                        self.event_observed)))
        return
Example #22
0
    def plot(self, standardized=False, columns=None, **kwargs):
        """
        Produces a visual representation of the fitted coefficients, including their standard errors and magnitudes.

        Parameters
        ----------
        standardized: bool, optional
            standardize each estimated coefficient and confidence interval
            endpoints by the standard error of the estimate.
        columns : list, optional
            specifiy a subset of the columns to plot
        kwargs:
            pass in additional plotting commands

        Returns
        -------
        ax: matplotlib axis
            the matplotlib axis that be edited.

        """
        from matplotlib import pyplot as plt

        ax = kwargs.get("ax", None) or plt.figure().add_subplot(111)

        if columns is not None:
            yaxis_locations = range(len(columns))
            summary = self.summary.loc[columns]
            lower_bound = self.confidence_intervals_[columns].loc[
                "lower-bound"].copy()
            upper_bound = self.confidence_intervals_[columns].loc[
                "upper-bound"].copy()
            hazards = self.hazards_[columns].values[0].copy()
        else:
            yaxis_locations = range(len(self.hazards_.columns))
            summary = self.summary
            lower_bound = self.confidence_intervals_.loc["lower-bound"].copy()
            upper_bound = self.confidence_intervals_.loc["upper-bound"].copy()
            hazards = self.hazards_.values[0].copy()

        if standardized:
            se = summary["se(coef)"]
            lower_bound /= se
            upper_bound /= se
            hazards /= se

        order = np.argsort(hazards)
        ax.scatter(upper_bound.values[order],
                   yaxis_locations,
                   marker="|",
                   c="k")
        ax.scatter(lower_bound.values[order],
                   yaxis_locations,
                   marker="|",
                   c="k")
        ax.scatter(hazards[order], yaxis_locations, marker="o", c="k")
        ax.hlines(yaxis_locations,
                  lower_bound.values[order],
                  upper_bound.values[order],
                  color="k",
                  lw=1)

        tick_labels = [
            c + significance_code(p).strip()
            for (c, p) in summary["p"][order].iteritems()
        ]
        plt.yticks(yaxis_locations, tick_labels)
        plt.xlabel("standardized coef" if standardized else "coef")
        return ax