Example #1
0
def test_sf_doesnt_return_nans():
    left = [6, 7, 8, 7, 5]
    right = [7, 8, 10, 16, 20]
    results = npmle(left, right)
    npt.assert_allclose(results[1], [interval(7, 7), interval(8, 8)])
    npt.assert_allclose(results[0], [0.5, 0.5])
    sf = reconstruct_survival_function(*results, timeline=[6, 7, 8, 16, 20])
    assert not np.isnan(sf.values).any()
Example #2
0
    def fit_interval_censoring(
        self,
        lower_bound,
        upper_bound,
        event_observed=None,
        timeline=None,
        label=None,
        alpha=None,
        ci_labels=None,
        entry=None,
        weights=None,
        tol: float = 1e-5,
        show_progress: bool = False,
        **kwargs,
    ) -> "KaplanMeierFitter":
        """
        Fit the model to a interval-censored dataset using non-parametric MLE. This estimator is
        also called the Turnbull Estimator.

        Currently, only closed interval are supported. However, it's easy to create open intervals by adding (or subtracting) a very small
        value from the lower-bound (or upper bound). For example, the following turns closed intervals into open intervals.

        >>> left, right = df['left'], df['right']
        >>> KaplanMeierFitter().fit_interval_censoring(left + 0.00001, right - 0.00001)

        Note
        ------
        This is new and experimental, and many features are missing.

        Parameters
        ----------
          lower_bound: an array, list, pd.DataFrame or pd.Series
            length n -- lower bound of observations
          upper_bound: an array, list, pd.DataFrame or pd.Series
            length n -- upper bound of observations
          event_observed: an array, list, pd.DataFrame, or pd.Series, optional
             True if the the death was observed, False if the event was lost (right-censored). This can be computed from
             the lower_bound and upper_bound, and can be left blank.
          timeline: an array, list, pd.DataFrame, or pd.Series, optional
            return the best estimate at the values in timelines (positively increasing)
          entry: an array, list, pd.DataFrame, or pd.Series, optional
             relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population
             entered study when they were "born".
          label: string, optional
            a string to name the column of the estimate.
          alpha: float, optional
            the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only.
          ci_labels: tuple, optional
                add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<1-alpha/2>
          weights: an array, list, pd.DataFrame, or pd.Series, optional
              if providing a weighted dataset. For example, instead
              of providing every subject as a single element of `durations` and `event_observed`, one could
              weigh subject differently.
          tol: float, optional
            minimum difference in log likelihood changes for iterative algorithm.
          show_progress: bool, optional
            display information during fitting.

        Returns
        -------
        self: KaplanMeierFitter
          self with new properties like ``survival_function_``, ``plot()``, ``median_survival_time_``
        """
        if entry is not None:
            raise NotImplementedError("entry is not supported yet")

        if weights is None:
            weights = np.ones_like(upper_bound)

        self.weights = np.asarray(weights)

        self.upper_bound = np.atleast_1d(
            pass_for_numeric_dtypes_or_raise_array(upper_bound))
        self.lower_bound = np.atleast_1d(
            pass_for_numeric_dtypes_or_raise_array(lower_bound))
        check_nans_or_infs(self.lower_bound)

        self.event_observed = self.lower_bound == self.upper_bound

        self.timeline = coalesce(
            timeline,
            np.unique(np.concatenate((self.upper_bound, self.lower_bound))))

        if (self.upper_bound < self.lower_bound).any():
            raise ValueError(
                "All upper_bound times must be greater than or equal to lower_bound times."
            )

        if event_observed is None:
            event_observed = self.upper_bound == self.lower_bound

        if ((self.lower_bound == self.upper_bound) != event_observed).any():
            raise ValueError(
                "For all rows, lower_bound == upper_bound if and only if event observed = 1 (uncensored). Likewise, lower_bound < upper_bound if and only if event observed = 0 (censored)"
            )

        self._label = coalesce(label, self._label, "NPMLE_estimate")

        results = npmle(self.lower_bound,
                        self.upper_bound,
                        verbose=show_progress,
                        tol=tol,
                        weights=weights,
                        **kwargs)
        self.survival_function_ = reconstruct_survival_function(
            *results, self.timeline, label=self._label).loc[self.timeline]
        self.cumulative_density_ = 1 - self.survival_function_

        self._median = median_survival_times(self.survival_function_)
        """
        self.confidence_interval_ = npmle_compute_confidence_intervals(self.lower_bound, self.upper_bound, self.survival_function_, self.alpha)
        self.confidence_interval_survival_function_ = self.confidence_interval_
        self.confidence_interval_cumulative_density_ = 1 - self.confidence_interval_
        """
        # estimation methods
        self._estimation_method = "survival_function_"
        self._estimate_name = "survival_function_"
        return self
Example #3
0
    def fit_interval_censoring(
        self,
        lower_bound,
        upper_bound,
        event_observed=None,
        timeline=None,
        label=None,
        alpha=None,
        ci_labels=None,
        show_progress=False,
        entry=None,
        weights=None,
        tol=1e-7,
    ) -> "KaplanMeierFitter":
        """
        Fit the model to a interval-censored dataset using non-parametric MLE. This estimator is
        also called the Turball Estimator.

        Note
        ------
        This is new and experimental, and many feature are missing.

        Parameters
        ----------
          lower_bound: an array, list, pd.DataFrame or pd.Series
            length n -- lower bound of observations
          upper_bound: an array, list, pd.DataFrame or pd.Series
            length n -- upper bound of observations
          event_observed: an array, list, pd.DataFrame, or pd.Series, optional
             True if the the death was observed, False if the event was lost (right-censored). This can be computed from
             the lower_bound and upper_bound, and can be left blank.
          timeline: an array, list, pd.DataFrame, or pd.Series, optional
            return the best estimate at the values in timelines (positively increasing)
          entry: an array, list, pd.DataFrame, or pd.Series, optional
             relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population
             entered study when they were "born".
          label: string, optional
            a string to name the column of the estimate.
          alpha: float, optional
            the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only.
          ci_labels: tuple, optional
                add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<1-alpha/2>
          weights: an array, list, pd.DataFrame, or pd.Series, optional
              if providing a weighted dataset. For example, instead
              of providing every subject as a single element of `durations` and `event_observed`, one could
              weigh subject differently.

        Returns
        -------
        self: KaplanMeierFitter
          self with new properties like ``survival_function_``, ``plot()``, ``median_survival_time_``
        """
        warnings.warn(
            "This is new and experimental, many feature are missing and accuracy is not reliable",
            UserWarning)

        if entry is not None or weights is not None:
            raise NotImplementedError("entry / weights is not supported yet")
        self.weights = np.ones_like(upper_bound)

        self.upper_bound = np.atleast_1d(
            pass_for_numeric_dtypes_or_raise_array(upper_bound))
        self.lower_bound = np.atleast_1d(
            pass_for_numeric_dtypes_or_raise_array(lower_bound))
        check_nans_or_infs(self.lower_bound)

        self.event_observed = self.lower_bound == self.upper_bound

        self.timeline = coalesce(
            timeline,
            np.unique(np.concatenate((self.upper_bound, self.lower_bound))))

        if (self.upper_bound < self.lower_bound).any():
            raise ValueError(
                "All upper_bound times must be greater than or equal to lower_bound times."
            )

        if event_observed is None:
            event_observed = self.upper_bound == self.lower_bound

        if ((self.lower_bound == self.upper_bound) != event_observed).any():
            raise ValueError(
                "For all rows, lower_bound == upper_bound if and only if event observed = 1 (uncensored). Likewise, lower_bound < upper_bound if and only if event observed = 0 (censored)"
            )

        self._label = coalesce(label, self._label, "NPMLE_estimate")

        probs, t_intervals = npmle(self.lower_bound,
                                   self.upper_bound,
                                   verbose=show_progress)
        self.survival_function_ = reconstruct_survival_function(
            probs, t_intervals, self.timeline,
            label=self._label).loc[self.timeline]
        self.cumulative_density_ = 1 - self.survival_function_

        self._median = median_survival_times(self.survival_function_)
        self.percentile = functools.partial(
            qth_survival_time,
            model_or_survival_function=self.survival_function_)
        """
        self.confidence_interval_ = npmle_compute_confidence_intervals(self.lower_bound, self.upper_bound, self.survival_function_, self.alpha)
        self.confidence_interval_survival_function_ = self.confidence_interval_
        self.confidence_interval_cumulative_density_ = 1 - self.confidence_interval_
        """
        # estimation methods
        self._estimation_method = "survival_function_"
        self._estimate_name = "survival_function_"
        self._update_docstrings()
        return self