Ejemplo n.º 1
0
    def fit(self, durations, event_observed=None, timeline=None, entry=None, label='KM_estimate',
            alpha=None, left_censorship=False, ci_labels=None):
        """
        Parameters:
          duration: an array, or pd.Series, of length n -- duration subject was observed for
          timeline: return the best estimate at the values in timelines (postively increasing)
          event_observed: an array, or pd.Series, of length n -- True if the the death was observed, False if the event
             was lost (right-censored). Defaults all True if event_observed==None
          entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is
             useful for left-truncated (not left-censored) observations. If None, all members of the population
             were born at time 0.
          label: a string to name the column of the estimate.
          alpha: the alpha value in the confidence intervals. Overrides the initializing
             alpha for this call to fit only.
          left_censorship: True if durations and event_observed refer to left censorship events. Default False
          ci_labels: add custom column names to the generated confidence intervals
                as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha>


        Returns:
          self, with new properties like 'survival_function_'.

        """
        # if the user is interested in left-censorship, we return the cumulative_density_, no survival_function_,
        estimate_name = 'survival_function_' if not left_censorship else 'cumulative_density_'
        v = _preprocess_inputs(durations, event_observed, timeline, entry)
        self.durations, self.event_observed, self.timeline, self.entry, self.event_table = v
        self._label = label
        alpha = alpha if alpha else self.alpha
        log_survival_function, cumulative_sq_ = _additive_estimate(self.event_table, self.timeline,
                                                                   self._additive_f, self._additive_var,
                                                                   left_censorship)

        if entry is not None:
            # a serious problem with KM is that when the sample size is small and there are too few early
            # truncation times, it may happen that is the number of patients at risk and the number of deaths is the same.
            # we adjust for this using the Breslow-Fleming-Harrington estimator
            n = self.event_table.shape[0]
            net_population = (self.event_table['entrance'] - self.event_table['removed']).cumsum()
            if net_population.iloc[:int(n / 2)].min() == 0:
                ix = net_population.iloc[:int(n / 2)].argmin()
                raise StatError("""There are too few early truncation times and too many events. S(t)==0 for all t>%.1f. Recommend BreslowFlemingHarringtonFitter.""" % ix)

        # estimation
        setattr(self, estimate_name, pd.DataFrame(np.exp(log_survival_function), columns=[self._label]))
        self.__estimate = getattr(self, estimate_name)
        self.confidence_interval_ = self._bounds(cumulative_sq_[:, None], alpha, ci_labels)
        self.median_ = median_survival_times(self.__estimate)

        # estimation methods
        self.predict = self._predict(estimate_name, label)
        self.subtract = self._subtract(estimate_name)
        self.divide = self._divide(estimate_name)

        # plotting functions
        self.plot = self._plot_estimate(estimate_name)
        setattr(self, "plot_" + estimate_name, self.plot)
        self.plot_loglogs = plot_loglogs(self)
        return self
Ejemplo n.º 2
0
    def fit(self,
            durations,
            event_observed=None,
            timeline=None,
            entry=None,
            label='KM_estimate',
            alpha=None,
            left_censorship=False,
            ci_labels=None):
        """
        Parameters:
          duration: an array, or pd.Series, of length n -- duration subject was observed for
          timeline: return the best estimate at the values in timelines (postively increasing)
          event_observed: an array, or pd.Series, of length n -- True if the the death was observed, False if the event
             was lost (right-censored). Defaults all True if event_observed==None
          entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is
             useful for left-truncated (not left-censored) observations. If None, all members of the population
             were born at time 0.
          label: a string to name the column of the estimate.
          alpha: the alpha value in the confidence intervals. Overrides the initializing
             alpha for this call to fit only.
          left_censorship: True if durations and event_observed refer to left censorship events. Default False
          ci_labels: add custom column names to the generated confidence intervals
                as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha>


        Returns:
          self, with new properties like 'survival_function_'.

        """
        # if the user is interested in left-censorship, we return the cumulative_density_, no survival_function_,
        estimate_name = 'survival_function_' if not left_censorship else 'cumulative_density_'
        v = _preprocess_inputs(durations, event_observed, timeline, entry)
        self.durations, self.event_observed, self.timeline, self.entry, self.event_table = v
        self._label = label
        alpha = alpha if alpha else self.alpha
        log_survival_function, cumulative_sq_ = _additive_estimate(
            self.event_table, self.timeline, self._additive_f,
            self._additive_var, left_censorship)

        if entry is not None:
            # a serious problem with KM is that when the sample size is small and there are too few early
            # truncation times, it may happen that is the number of patients at risk and the number of deaths is the same.
            # we adjust for this using the Breslow-Fleming-Harrington estimator
            n = self.event_table.shape[0]
            net_population = (self.event_table['entrance'] -
                              self.event_table['removed']).cumsum()
            if net_population.iloc[:int(n / 2)].min() == 0:
                ix = net_population.iloc[:int(n / 2)].idxmin()
                raise StatError(
                    """There are too few early truncation times and too many events. S(t)==0 for all t>%.1f. Recommend BreslowFlemingHarringtonFitter."""
                    % ix)

        # estimation
        setattr(
            self, estimate_name,
            pd.DataFrame(np.exp(log_survival_function), columns=[self._label]))
        self.__estimate = getattr(self, estimate_name)
        self.confidence_interval_ = self._bounds(cumulative_sq_[:, None],
                                                 alpha, ci_labels)
        self.median_ = median_survival_times(self.__estimate,
                                             left_censorship=left_censorship)

        # estimation methods
        self.predict = self._predict(estimate_name, label)
        self.subtract = self._subtract(estimate_name)
        self.divide = self._divide(estimate_name)

        # plotting functions
        self.plot = self._plot_estimate(estimate_name)
        setattr(self, "plot_" + estimate_name, self.plot)
        self.plot_loglogs = plot_loglogs(self)
        return self
Ejemplo n.º 3
0
 def plot_loglogs(self, *args, **kwargs):
     r"""
     Plot :math:`\log(S(t))` against :math:`\log(t)`
     """
     return plot_loglogs(self, *args, **kwargs)
Ejemplo n.º 4
0
 def plot_loglogs(self, *args, **kwargs):
     return plot_loglogs(self, *args, **kwargs)
Ejemplo n.º 5
0
 def plot_loglogs(self, *args, **kwargs):
     r"""
     Plot :math:`\log(S(t))` against :math:`\log(t)`. Same arguments as ``.plot``.
     """
     return plot_loglogs(self, *args, **kwargs)
 def plot_loglogs(self, *args, **kwargs):
     r"""
     Plot :math:`\log(S(t))` against :math:`\log(t)`
     """
     return plot_loglogs(self, *args, **kwargs)