def test_sf_doesnt_return_nans(): left = [6, 7, 8, 7, 5] right = [7, 8, 10, 16, 20] results = npmle(left, right) npt.assert_allclose(results[1], [interval(7, 7), interval(8, 8)]) npt.assert_allclose(results[0], [0.5, 0.5]) sf = reconstruct_survival_function(*results, timeline=[6, 7, 8, 16, 20]) assert not np.isnan(sf.values).any()
def fit_interval_censoring( self, lower_bound, upper_bound, event_observed=None, timeline=None, label=None, alpha=None, ci_labels=None, entry=None, weights=None, tol: float = 1e-5, show_progress: bool = False, **kwargs, ) -> "KaplanMeierFitter": """ Fit the model to a interval-censored dataset using non-parametric MLE. This estimator is also called the Turnbull Estimator. Currently, only closed interval are supported. However, it's easy to create open intervals by adding (or subtracting) a very small value from the lower-bound (or upper bound). For example, the following turns closed intervals into open intervals. >>> left, right = df['left'], df['right'] >>> KaplanMeierFitter().fit_interval_censoring(left + 0.00001, right - 0.00001) Note ------ This is new and experimental, and many features are missing. Parameters ---------- lower_bound: an array, list, pd.DataFrame or pd.Series length n -- lower bound of observations upper_bound: an array, list, pd.DataFrame or pd.Series length n -- upper bound of observations event_observed: an array, list, pd.DataFrame, or pd.Series, optional True if the the death was observed, False if the event was lost (right-censored). This can be computed from the lower_bound and upper_bound, and can be left blank. timeline: an array, list, pd.DataFrame, or pd.Series, optional return the best estimate at the values in timelines (positively increasing) entry: an array, list, pd.DataFrame, or pd.Series, optional relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population entered study when they were "born". label: string, optional a string to name the column of the estimate. alpha: float, optional the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. ci_labels: tuple, optional add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<1-alpha/2> weights: an array, list, pd.DataFrame, or pd.Series, optional if providing a weighted dataset. For example, instead of providing every subject as a single element of `durations` and `event_observed`, one could weigh subject differently. tol: float, optional minimum difference in log likelihood changes for iterative algorithm. show_progress: bool, optional display information during fitting. Returns ------- self: KaplanMeierFitter self with new properties like ``survival_function_``, ``plot()``, ``median_survival_time_`` """ if entry is not None: raise NotImplementedError("entry is not supported yet") if weights is None: weights = np.ones_like(upper_bound) self.weights = np.asarray(weights) self.upper_bound = np.atleast_1d( pass_for_numeric_dtypes_or_raise_array(upper_bound)) self.lower_bound = np.atleast_1d( pass_for_numeric_dtypes_or_raise_array(lower_bound)) check_nans_or_infs(self.lower_bound) self.event_observed = self.lower_bound == self.upper_bound self.timeline = coalesce( timeline, np.unique(np.concatenate((self.upper_bound, self.lower_bound)))) if (self.upper_bound < self.lower_bound).any(): raise ValueError( "All upper_bound times must be greater than or equal to lower_bound times." ) if event_observed is None: event_observed = self.upper_bound == self.lower_bound if ((self.lower_bound == self.upper_bound) != event_observed).any(): raise ValueError( "For all rows, lower_bound == upper_bound if and only if event observed = 1 (uncensored). Likewise, lower_bound < upper_bound if and only if event observed = 0 (censored)" ) self._label = coalesce(label, self._label, "NPMLE_estimate") results = npmle(self.lower_bound, self.upper_bound, verbose=show_progress, tol=tol, weights=weights, **kwargs) self.survival_function_ = reconstruct_survival_function( *results, self.timeline, label=self._label).loc[self.timeline] self.cumulative_density_ = 1 - self.survival_function_ self._median = median_survival_times(self.survival_function_) """ self.confidence_interval_ = npmle_compute_confidence_intervals(self.lower_bound, self.upper_bound, self.survival_function_, self.alpha) self.confidence_interval_survival_function_ = self.confidence_interval_ self.confidence_interval_cumulative_density_ = 1 - self.confidence_interval_ """ # estimation methods self._estimation_method = "survival_function_" self._estimate_name = "survival_function_" return self
def fit_interval_censoring( self, lower_bound, upper_bound, event_observed=None, timeline=None, label=None, alpha=None, ci_labels=None, show_progress=False, entry=None, weights=None, tol=1e-7, ) -> "KaplanMeierFitter": """ Fit the model to a interval-censored dataset using non-parametric MLE. This estimator is also called the Turball Estimator. Note ------ This is new and experimental, and many feature are missing. Parameters ---------- lower_bound: an array, list, pd.DataFrame or pd.Series length n -- lower bound of observations upper_bound: an array, list, pd.DataFrame or pd.Series length n -- upper bound of observations event_observed: an array, list, pd.DataFrame, or pd.Series, optional True if the the death was observed, False if the event was lost (right-censored). This can be computed from the lower_bound and upper_bound, and can be left blank. timeline: an array, list, pd.DataFrame, or pd.Series, optional return the best estimate at the values in timelines (positively increasing) entry: an array, list, pd.DataFrame, or pd.Series, optional relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population entered study when they were "born". label: string, optional a string to name the column of the estimate. alpha: float, optional the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. ci_labels: tuple, optional add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<1-alpha/2> weights: an array, list, pd.DataFrame, or pd.Series, optional if providing a weighted dataset. For example, instead of providing every subject as a single element of `durations` and `event_observed`, one could weigh subject differently. Returns ------- self: KaplanMeierFitter self with new properties like ``survival_function_``, ``plot()``, ``median_survival_time_`` """ warnings.warn( "This is new and experimental, many feature are missing and accuracy is not reliable", UserWarning) if entry is not None or weights is not None: raise NotImplementedError("entry / weights is not supported yet") self.weights = np.ones_like(upper_bound) self.upper_bound = np.atleast_1d( pass_for_numeric_dtypes_or_raise_array(upper_bound)) self.lower_bound = np.atleast_1d( pass_for_numeric_dtypes_or_raise_array(lower_bound)) check_nans_or_infs(self.lower_bound) self.event_observed = self.lower_bound == self.upper_bound self.timeline = coalesce( timeline, np.unique(np.concatenate((self.upper_bound, self.lower_bound)))) if (self.upper_bound < self.lower_bound).any(): raise ValueError( "All upper_bound times must be greater than or equal to lower_bound times." ) if event_observed is None: event_observed = self.upper_bound == self.lower_bound if ((self.lower_bound == self.upper_bound) != event_observed).any(): raise ValueError( "For all rows, lower_bound == upper_bound if and only if event observed = 1 (uncensored). Likewise, lower_bound < upper_bound if and only if event observed = 0 (censored)" ) self._label = coalesce(label, self._label, "NPMLE_estimate") probs, t_intervals = npmle(self.lower_bound, self.upper_bound, verbose=show_progress) self.survival_function_ = reconstruct_survival_function( probs, t_intervals, self.timeline, label=self._label).loc[self.timeline] self.cumulative_density_ = 1 - self.survival_function_ self._median = median_survival_times(self.survival_function_) self.percentile = functools.partial( qth_survival_time, model_or_survival_function=self.survival_function_) """ self.confidence_interval_ = npmle_compute_confidence_intervals(self.lower_bound, self.upper_bound, self.survival_function_, self.alpha) self.confidence_interval_survival_function_ = self.confidence_interval_ self.confidence_interval_cumulative_density_ = 1 - self.confidence_interval_ """ # estimation methods self._estimation_method = "survival_function_" self._estimate_name = "survival_function_" self._update_docstrings() return self