def fit(self, durations, event_observed=None, timeline=None, entry=None, label='KM_estimate', alpha=None, left_censorship=False, ci_labels=None): """ Parameters: duration: an array, or pd.Series, of length n -- duration subject was observed for timeline: return the best estimate at the values in timelines (postively increasing) event_observed: an array, or pd.Series, of length n -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population were born at time 0. label: a string to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. left_censorship: True if durations and event_observed refer to left censorship events. Default False ci_labels: add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha> Returns: self, with new properties like 'survival_function_'. """ # if the user is interested in left-censorship, we return the cumulative_density_, no survival_function_, estimate_name = 'survival_function_' if not left_censorship else 'cumulative_density_' v = _preprocess_inputs(durations, event_observed, timeline, entry) self.durations, self.event_observed, self.timeline, self.entry, self.event_table = v self._label = label alpha = alpha if alpha else self.alpha log_survival_function, cumulative_sq_ = _additive_estimate(self.event_table, self.timeline, self._additive_f, self._additive_var, left_censorship) if entry is not None: # a serious problem with KM is that when the sample size is small and there are too few early # truncation times, it may happen that is the number of patients at risk and the number of deaths is the same. # we adjust for this using the Breslow-Fleming-Harrington estimator n = self.event_table.shape[0] net_population = (self.event_table['entrance'] - self.event_table['removed']).cumsum() if net_population.iloc[:int(n / 2)].min() == 0: ix = net_population.iloc[:int(n / 2)].argmin() raise StatError("""There are too few early truncation times and too many events. S(t)==0 for all t>%.1f. Recommend BreslowFlemingHarringtonFitter.""" % ix) # estimation setattr(self, estimate_name, pd.DataFrame(np.exp(log_survival_function), columns=[self._label])) self.__estimate = getattr(self, estimate_name) self.confidence_interval_ = self._bounds(cumulative_sq_[:, None], alpha, ci_labels) self.median_ = median_survival_times(self.__estimate) # estimation methods self.predict = self._predict(estimate_name, label) self.subtract = self._subtract(estimate_name) self.divide = self._divide(estimate_name) # plotting functions self.plot = self._plot_estimate(estimate_name) setattr(self, "plot_" + estimate_name, self.plot) self.plot_loglogs = plot_loglogs(self) return self
def fit(self, durations, event_observed=None, timeline=None, entry=None, label='KM_estimate', alpha=None, left_censorship=False, ci_labels=None): """ Parameters: duration: an array, or pd.Series, of length n -- duration subject was observed for timeline: return the best estimate at the values in timelines (postively increasing) event_observed: an array, or pd.Series, of length n -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population were born at time 0. label: a string to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. left_censorship: True if durations and event_observed refer to left censorship events. Default False ci_labels: add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha> Returns: self, with new properties like 'survival_function_'. """ # if the user is interested in left-censorship, we return the cumulative_density_, no survival_function_, estimate_name = 'survival_function_' if not left_censorship else 'cumulative_density_' v = _preprocess_inputs(durations, event_observed, timeline, entry) self.durations, self.event_observed, self.timeline, self.entry, self.event_table = v self._label = label alpha = alpha if alpha else self.alpha log_survival_function, cumulative_sq_ = _additive_estimate( self.event_table, self.timeline, self._additive_f, self._additive_var, left_censorship) if entry is not None: # a serious problem with KM is that when the sample size is small and there are too few early # truncation times, it may happen that is the number of patients at risk and the number of deaths is the same. # we adjust for this using the Breslow-Fleming-Harrington estimator n = self.event_table.shape[0] net_population = (self.event_table['entrance'] - self.event_table['removed']).cumsum() if net_population.iloc[:int(n / 2)].min() == 0: ix = net_population.iloc[:int(n / 2)].idxmin() raise StatError( """There are too few early truncation times and too many events. S(t)==0 for all t>%.1f. Recommend BreslowFlemingHarringtonFitter.""" % ix) # estimation setattr( self, estimate_name, pd.DataFrame(np.exp(log_survival_function), columns=[self._label])) self.__estimate = getattr(self, estimate_name) self.confidence_interval_ = self._bounds(cumulative_sq_[:, None], alpha, ci_labels) self.median_ = median_survival_times(self.__estimate, left_censorship=left_censorship) # estimation methods self.predict = self._predict(estimate_name, label) self.subtract = self._subtract(estimate_name) self.divide = self._divide(estimate_name) # plotting functions self.plot = self._plot_estimate(estimate_name) setattr(self, "plot_" + estimate_name, self.plot) self.plot_loglogs = plot_loglogs(self) return self
def plot_loglogs(self, *args, **kwargs): r""" Plot :math:`\log(S(t))` against :math:`\log(t)` """ return plot_loglogs(self, *args, **kwargs)
def plot_loglogs(self, *args, **kwargs): return plot_loglogs(self, *args, **kwargs)
def plot_loglogs(self, *args, **kwargs): r""" Plot :math:`\log(S(t))` against :math:`\log(t)`. Same arguments as ``.plot``. """ return plot_loglogs(self, *args, **kwargs)