Esempi in Python per CensoringType, esempi in Python per lifelines.utils.CensoringType

Esempio n. 1

0

Mostra file

File: piecewise_exponential_regression_fitter.py Progetto: matrogersmtl/lifelines

    def _ll_null(self):
        if hasattr(self, "_ll_null_"):
            return self._ll_null_

        initial_point = np.zeros(len(self._fitted_parameter_names))

        model = self.__class__(breakpoints=self.breakpoints[:-1], penalizer=self.penalizer)
        regressors = {param_name: ["_intercept"] for param_name in self._fitted_parameter_names}
        if CensoringType.is_right_censoring(self):
            df = pd.DataFrame({"T": self.durations, "E": self.event_observed, "entry": self.entry, "_intercept": 1.0})
            model.fit_right_censoring(
                df, "T", "E", initial_point=initial_point, entry_col="entry", regressors=regressors
            )
        elif CensoringType.is_interval_censoring(self):
            df = pd.DataFrame(
                {
                    "lb": self.lower_bound,
                    "ub": self.upper_bound,
                    "E": self.event_observed,
                    "entry": self.entry,
                    "_intercept": 1.0,
                }
            )
            model.fit_interval_censoring(
                df, "lb", "ub", "E", initial_point=initial_point, entry_col="entry", regressors=regressors
            )
        if CensoringType.is_left_censoring(self):
            raise NotImplementedError()

        self._ll_null_ = model.log_likelihood_
        return self._ll_null_

Esempio n. 2

0

Mostra file

File: plotting.py Progetto: zuoxiaolei/lifelines

def cdf_plot(model, timeline=None, **plot_kwargs):
    from lifelines import KaplanMeierFitter

    set_kwargs_ax(plot_kwargs)
    ax = plot_kwargs.pop("ax")

    if timeline is None:
        timeline = model.timeline

    COL_EMP = "empirical quantiles"

    if CensoringType.is_left_censoring(model):
        kmf = KaplanMeierFitter().fit_left_censoring(model.durations,
                                                     model.event_observed,
                                                     label=COL_EMP,
                                                     timeline=timeline)
    elif CensoringType.is_right_censoring(model):
        kmf = KaplanMeierFitter().fit_right_censoring(model.durations,
                                                      model.event_observed,
                                                      label=COL_EMP,
                                                      timeline=timeline)
    elif CensoringType.is_interval_censoring(model):
        raise NotImplementedError()

    kmf.plot_cumulative_density(ax=ax, **plot_kwargs)

    dist = get_distribution_name_of_lifelines_model(model)
    dist_object = create_scipy_stats_model_from_lifelines_model(model)
    ax.plot(timeline,
            dist_object.cdf(timeline),
            label="fitted %s" % dist,
            **plot_kwargs)
    ax.legend()
    return ax

Esempio n. 3

0

Mostra file

def cdf_plot(model, timeline=None, ax=None, **plot_kwargs):
    """


    """
    from lifelines import KaplanMeierFitter

    if ax is None:
        ax = plt.gca()

    if timeline is None:
        timeline = model.timeline

    COL_EMP = "empirical CDF"

    if CensoringType.is_left_censoring(model):
        empirical_kmf = KaplanMeierFitter().fit_left_censoring(
            model.durations, model.event_observed, label=COL_EMP, timeline=timeline
        )
    elif CensoringType.is_right_censoring(model):
        empirical_kmf = KaplanMeierFitter().fit_right_censoring(
            model.durations, model.event_observed, label=COL_EMP, timeline=timeline
        )
    elif CensoringType.is_interval_censoring(model):
        raise NotImplementedError("lifelines does not have a non-parametric interval model yet.")

    empirical_kmf.plot_cumulative_density(ax=ax, **plot_kwargs)

    dist = get_distribution_name_of_lifelines_model(model)
    dist_object = create_scipy_stats_model_from_lifelines_model(model)
    ax.plot(timeline, dist_object.cdf(timeline), label="fitted %s" % dist, **plot_kwargs)
    ax.legend()
    return ax

Esempio n. 4

0

Mostra file

File: generalized_gamma_fitter.py Progetto: wingkitlee0/lifelines

 def _create_initial_point(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         log_data = log(Ts[0])
     elif CensoringType.is_left_censoring(self):
         log_data = log(Ts[1])
     elif CensoringType.is_interval_censoring(self):
         log_data = log(Ts[1] - Ts[0])
     return np.array([log_data.mean(), log(log_data.std()), 0.1])

Esempio n. 5

0

Mostra file

 def _create_initial_point(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         T = Ts[0]
     elif CensoringType.is_left_censoring(self):
         T = Ts[1]
     elif CensoringType.is_interval_censoring(self):
         T = Ts[1] - Ts[0]
     return np.array([np.median(T), 1.0])

Esempio n. 6

0

Mostra file

 def _get_initial_values(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         log_data = np.log(Ts[0])
     elif CensoringType.is_left_censoring(self):
         log_data = np.log(Ts[1])
     elif CensoringType.is_interval_censoring(self):
         log_data = np.log(Ts[1] - Ts[0])
     return np.array([log_data.mean(), np.log(log_data.std()), 1.0])

Esempio n. 7

0

Mostra file

File: log_normal_fitter.py Progetto: wingkitlee0/lifelines

 def _create_initial_point(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         log_T = np.log(Ts[0])
     elif CensoringType.is_left_censoring(self):
         log_T = np.log(Ts[1])
     elif CensoringType.is_interval_censoring(self):
         log_T = np.log(Ts[1])
     return np.array([np.median(log_T), 1.0])

Esempio n. 8

0

Mostra file

File: generalized_gamma_fitter.py Progetto: spiros/lifelines

 def _create_initial_point(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         log_data = log(Ts[0])
     elif CensoringType.is_left_censoring(self):
         log_data = log(Ts[1])
     elif CensoringType.is_interval_censoring(self):
         # this fails if Ts[1] == Ts[0], so we add a some fudge factors.
         log_data = log(Ts[1] - Ts[0] + 0.01)
     return np.array([log_data.mean(), log(log_data.std() + 0.01), 0.1])

Esempio n. 9

0

Mostra file

File: plotting.py Progetto: goncaloperes/lifelines

def cdf_plot(model, timeline=None, ax=None, **plot_kwargs):
    """
    This plot compares the empirical CDF (derived by KaplanMeier) vs the model CDF.

    Parameters
    ------------
    model: lifelines univariate model
    timeline: iterable
    ax: matplotlib axis

    """
    from lifelines import KaplanMeierFitter
    from matplotlib import pyplot as plt

    if ax is None:
        ax = plt.gca()

    if timeline is None:
        timeline = model.timeline

    COL_EMP = "empirical CDF"

    if CensoringType.is_left_censoring(model):
        empirical_kmf = KaplanMeierFitter().fit_left_censoring(
            model.durations,
            model.event_observed,
            label=COL_EMP,
            timeline=timeline,
            weights=model.weights,
            entry=model.entry)
    elif CensoringType.is_right_censoring(model):
        empirical_kmf = KaplanMeierFitter().fit_right_censoring(
            model.durations,
            model.event_observed,
            label=COL_EMP,
            timeline=timeline,
            weights=model.weights,
            entry=model.entry)
    elif CensoringType.is_interval_censoring(model):
        empirical_kmf = KaplanMeierFitter().fit_interval_censoring(
            model.lower_bound,
            model.upper_bound,
            label=COL_EMP,
            timeline=timeline,
            weights=model.weights,
            entry=model.entry)

    empirical_kmf.plot_cumulative_density(ax=ax, **plot_kwargs)

    dist = get_distribution_name_of_lifelines_model(model)
    dist_object = create_scipy_stats_model_from_lifelines_model(model)
    ax.plot(timeline,
            dist_object.cdf(timeline),
            label="fitted %s" % dist,
            **plot_kwargs)
    ax.legend()
    return ax

Esempio n. 10

0

Mostra file

 def _create_initial_point(self, Ts, E, *args):
     if CensoringType.is_right_censoring(self):
         T = Ts[0]
     elif CensoringType.is_left_censoring(self):
         T = np.clip(0.0001, np.inf, Ts[1])
     elif CensoringType.is_interval_censoring(self):
         if E.sum() > 0:
             # Ts[1] can contain infs, so ignore this data
             okay_data = Ts[1] < 1e10
             T = Ts[1]
             T = T[okay_data]
         else:
             T = np.array([1.0])
     return np.array([np.median(T), 1.0])

Esempio n. 11

0

Mostra file

File: generalized_aft.py Progetto: dmedvinsky/lifelines

    def _ll_null(self):
        if hasattr(self, "_ll_null_"):
            return self._ll_null_

        initial_point = np.zeros(len(self._fitted_parameter_names))
        regressors = {
            name: ["intercept"]
            for name in self._fitted_parameter_names
        }

        model = self.__class__()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            if CensoringType.is_right_censoring(self):
                df = pd.DataFrame({
                    "T": self.durations,
                    "E": self.event_observed,
                    "entry": self.entry,
                    "intercept": 1
                })
                model.fit_right_censoring(df,
                                          "T",
                                          "E",
                                          initial_point=initial_point,
                                          entry_col="entry",
                                          regressors=regressors)
            elif CensoringType.is_interval_censoring(self):
                df = pd.DataFrame({
                    "lb": self.lower_bound,
                    "ub": self.upper_bound,
                    "E": self.event_observed,
                    "entry": self.entry,
                    "intercept": 1,
                })
                model.fit_interval_censoring(df,
                                             "lb",
                                             "ub",
                                             "E",
                                             initial_point=initial_point,
                                             entry_col="entry",
                                             regressors=regressors)
            if CensoringType.is_left_censoring(self):
                raise NotImplementedError()

        self._ll_null_ = model._log_likelihood
        return self._ll_null_

Esempio n. 12

0

Mostra file

 def plot_survival_function(self, **kwargs):
     """Alias of ``plot``"""
     if not CensoringType.is_interval_censoring(self):
         return _plot_estimate(self, estimate="survival_function_", **kwargs)
     else:
         # hack for now.
         color = coalesce(kwargs.get("c"), kwargs.get("color"), "k")
         self.survival_function_.plot(drawstyle="steps-pre", color=color, **kwargs)

Esempio n. 13

0

Mostra file

File: covid_plot.py Progetto: basroy/COVID_analyse

def cdf_plot(model, timeline=None, ax=None, **plot_kwargs):
    """
         Cumulative Distribution Function
    """
    from lifelines import KaplanMeierFitter

    #kmf = KaplanMeierFitter()
    #kmf.fit(durations = churn_data['tenure'], event_observed = churn_data['Churn - Yes'] )

    if ax is None:
        ax = plt.gca()

    if timeline is None:
        timeline = model.timeline

    CDL_EMP = "empirical CDF"
    if CensoringType.is_left_censoring(model):
        emp_kmf = KaplanMeierFitter().fit_left_censoring(model.durations,
                                                         model.event_observed,
                                                         label=CDL_EMP,
                                                         timeline=timeline,
                                                         weights=model.weights,
                                                         entry=model.entry)
    if CensoringType.is_right_censoring(model):
        emp_kmf = KaplanMeierFitter().fit_right_censoring(
            model.durations,
            model.event_observed,
            label=CDL_EMP,
            timeline=timeline,
            weights=model.weights,
            entry=model.entry)
    if CensoringType.is_interval_censoring(model):
        emp_kmf = KaplanMeierFitter().fit_interval_censoring(
            model.lower_bound,
            model.upper_bound,
            label=CDL_EMP,
            timeline=timeline,
            weights=model.weights,
            entry=model.entry)

Esempio n. 14

0

Mostra file

File: kaplan_meier_fitter.py Progetto: olgadk7/lifelines

    def plot_cumulative_density(self, **kwargs):
        """
        Plots a pretty figure of the cumulative density function.

        Matplotlib plot arguments can be passed in inside the kwargs.

        Parameters
        -----------
        show_censors: bool
            place markers at censorship events. Default: False
        censor_styles: bool
            If show_censors, this dictionary will be passed into the plot call.
        ci_alpha: bool
            the transparency level of the confidence interval. Default: 0.3
        ci_force_lines: bool
            force the confidence intervals to be line plots (versus default shaded areas). Default: False
        ci_show: bool
            show confidence intervals. Default: True
        ci_legend: bool
            if ci_force_lines is True, this is a boolean flag to add the lines' labels to the legend. Default: False
        at_risk_counts: bool
            show group sizes at time points. See function ``add_at_risk_counts`` for details. Default: False
        loc: slice
            specify a time-based subsection of the curves to plot, ex:

            >>> model.plot(loc=slice(0.,10.))

            will plot the time values between t=0. and t=10.
        iloc: slice
            specify a location-based subsection of the curves to plot, ex:

            >>> model.plot(iloc=slice(0,10))

            will plot the first 10 time points.

        Returns
        -------
        ax:
            a pyplot axis object
        """
        if not CensoringType.is_interval_censoring(self):
            return _plot_estimate(self,
                                  estimate="cumulative_density_",
                                  **kwargs)
        else:
            # hack for now.
            color = coalesce(kwargs.get("c"), kwargs.get("color"), "k")
            self.cumulative_density_.plot(drawstyle="steps",
                                          color=color,
                                          **kwargs)

Esempio n. 15

0

Mostra file

    def plot_survival_function(self, **kwargs):
        """Alias of ``plot``"""
        if not CensoringType.is_interval_censoring(self):
            return _plot_estimate(self, estimate="survival_function_", **kwargs)
        else:
            # hack for now.
            def safe_pop(dict, key):
                if key in dict:
                    return dict.pop(key)
                else:
                    return None

            color = coalesce(safe_pop(kwargs, "c"), safe_pop(kwargs, "color"), "k")
            self.survival_function_.plot(drawstyle="steps-pre", color=color, **kwargs)

Esempio n. 16

0

Mostra file

File: kaplan_meier_fitter.py Progetto: olgadk7/lifelines

    def _fit(self,
             durations,
             event_observed=None,
             timeline=None,
             entry=None,
             label=None,
             alpha=None,
             ci_labels=None,
             weights=None):  # pylint: disable=too-many-arguments,too-many-locals
        """
        Parameters
        ----------
          durations: an array, list, pd.DataFrame or pd.Series
            length n -- duration subject was observed for
          event_observed: an array, list, pd.DataFrame, or pd.Series, optional
             True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None
          timeline: an array, list, pd.DataFrame, or pd.Series, optional
            return the best estimate at the values in timelines (positively increasing)
          entry: an array, list, pd.DataFrame, or pd.Series, optional
             relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population
             entered study when they were "born".
          label: string, optional
            a string to name the column of the estimate.
          alpha: float, optional
            the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only.
          ci_labels: tuple, optional
                add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<1-alpha/2>
          weights: an array, list, pd.DataFrame, or pd.Series, optional
              if providing a weighted dataset. For example, instead
              of providing every subject as a single element of `durations` and `event_observed`, one could
              weigh subject differently.

        Returns
        -------
        self: KaplanMeierFitter
          self with new properties like ``survival_function_``, ``plot()``, ``median_survival_time_``

        """
        durations = np.asarray(durations)
        self._check_values(durations)

        if event_observed is not None:
            event_observed = np.asarray(event_observed)
            self._check_values(event_observed)

        self._label = coalesce(label, self._label, "KM_estimate")

        if weights is not None:
            weights = np.asarray(weights)
            if (weights.astype(int) != weights).any():
                warnings.warn(
                    """It looks like your weights are not integers, possibly propensity scores then?
  It's important to know that the naive variance estimates of the coefficients are biased. Instead use Monte Carlo to
  estimate the variances. See paper "Variance estimation when using inverse probability of treatment weighting (IPTW) with survival analysis"
  or "Adjusted Kaplan-Meier estimator and log-rank test with inverse probability of treatment weighting for survival data."
                  """,
                    StatisticalWarning,
                )
        else:
            weights = np.ones_like(durations, dtype=float)

        # if the user is interested in left-censorship, we return the cumulative_density_, no survival_function_,
        is_left_censoring = CensoringType.is_left_censoring(self)
        primary_estimate_name = "survival_function_" if not is_left_censoring else "cumulative_density_"
        secondary_estimate_name = "cumulative_density_" if not is_left_censoring else "survival_function_"

        (self.durations, self.event_observed, self.timeline, self.entry,
         self.event_table,
         self.weights) = _preprocess_inputs(durations, event_observed,
                                            timeline, entry, weights)

        alpha = alpha if alpha else self.alpha
        log_estimate, cumulative_sq_ = _additive_estimate(
            self.event_table, self.timeline, self._additive_f,
            self._additive_var, is_left_censoring)

        if entry is not None:
            # a serious problem with KM is that when the sample size is small and there are too few early
            # truncation times, it may happen that is the number of patients at risk and the number of deaths is the same.
            # we adjust for this using the Breslow-Fleming-Harrington estimator
            n = self.event_table.shape[0]
            net_population = (self.event_table["entrance"] -
                              self.event_table["removed"]).cumsum()
            if net_population.iloc[:int(n / 2)].min() == 0:
                ix = net_population.iloc[:int(n / 2)].idxmin()
                raise StatError(
                    """There are too few early truncation times and too many events. S(t)==0 for all t>%g. Recommend BreslowFlemingHarringtonFitter."""
                    % ix)

        # estimation
        setattr(self, primary_estimate_name,
                pd.DataFrame(np.exp(log_estimate), columns=[self._label]))
        setattr(self, secondary_estimate_name,
                pd.DataFrame(1 - np.exp(log_estimate), columns=[self._label]))

        self.__estimate = getattr(self, primary_estimate_name)
        self.confidence_interval_ = self._bounds(
            cumulative_sq_.values[:, None], alpha, ci_labels)
        self._median = median_survival_times(self.survival_function_)
        self._cumulative_sq_ = cumulative_sq_

        setattr(self, "confidence_interval_" + primary_estimate_name,
                self.confidence_interval_)
        setattr(self, "confidence_interval_" + secondary_estimate_name,
                1 - self.confidence_interval_)

        # estimation methods
        self._estimation_method = primary_estimate_name
        self._estimate_name = primary_estimate_name

        return self

Esempio n. 17

0

Mostra file

File: generalized_aft.py Progetto: dmedvinsky/lifelines

    def print_summary(self, decimals=2, **kwargs):
        """
        Print summary statistics describing the fit, the coefficients, and the error bounds.

        Parameters
        -----------
        decimals: int, optional (default=2)
            specify the number of decimal places to show
        alpha: float or iterable
            specify confidence intervals to show
        kwargs:
            print additional metadata in the output (useful to provide model names, dataset names, etc.) when comparing
            multiple outputs.

        """

        # Print information about data first
        justify = string_justify(18)
        print(self)
        if self.event_col:
            print("{} = '{}'".format(justify("event col"), self.event_col))
        if self.weights_col:
            print("{} = '{}'".format(justify("weights col"), self.weights_col))
        if self.penalizer > 0:
            print("{} = {}".format(justify("penalizer"), self.penalizer))

        if self.robust:
            print("{} = {}".format(justify("robust variance"), True))

        print("{} = {}".format(justify("number of subjects"),
                               self._n_examples))
        print("{} = {}".format(justify("number of events"),
                               self.event_observed.sum()))
        print("{} = {:.{prec}f}".format(justify("log-likelihood"),
                                        self._log_likelihood,
                                        prec=decimals))
        print("{} = {}".format(justify("time fit was run"),
                               self._time_fit_was_called))

        for k, v in kwargs.items():
            print("{} = {}\n".format(justify(k), v))

        print(end="\n")
        print("---")

        df = self.summary
        print(
            df.to_string(
                float_format=format_floats(decimals),
                formatters={
                    "p": format_p_value(decimals),
                    "exp(coef)": format_exp_floats(decimals)
                },
            ))

        print("---")
        if CensoringType.is_right_censoring(self):
            print("Concordance = {:.{prec}f}".format(self.score_,
                                                     prec=decimals))

        with np.errstate(invalid="ignore", divide="ignore"):
            sr = self.log_likelihood_ratio_test()
            print(
                "Log-likelihood ratio test = {:.{prec}f} on {} df, -log2(p)={:.{prec}f}"
                .format(sr.test_statistic,
                        sr.degrees_freedom,
                        -np.log2(sr.p_value),
                        prec=decimals))

Esempio n. 18

0

Mostra file

File: calibration.py Progetto: taoxu2016/lifelines

def survival_probability_calibration(model: RegressionFitter,
                                     training_df: pd.DataFrame,
                                     t0: float,
                                     ax=None):
    r"""
    Smoothed calibration curves for time-to-event models. This is analogous to
    calibration curves for classification models, extended to handle survival probabilities
    and censoring. Produces a matplotlib figure and some metrics.

    We want to calibrate our model's prediction of :math:`P(T < \text{t0})` against the observed frequencies.

    Parameters
    -------------

    model:
        a fitted lifelines regression model to be evaluated
    training_df: DataFrame
        the DataFrame used to train the model
    t0: float
        the time to evaluate the probability of event occurring prior at.

    Returns
    ----------
    ax:
        mpl axes
    ICI:
        mean absolute difference between predicted and observed
    E50:
        median absolute difference between predicted and observed

    https://onlinelibrary.wiley.com/doi/full/10.1002/sim.8570

    """
    def ccl(p):
        return np.log(-np.log(1 - p))

    if ax is None:
        ax = plt.gca()

    T = model.duration_col
    E = model.event_col

    predictions_at_t0 = np.clip(
        1 -
        model.predict_survival_function(training_df, times=[t0]).T.squeeze(),
        1e-10, 1 - 1e-10)

    # create new dataset with the predictions
    prediction_df = pd.DataFrame({
        "ccl_at_%d" % t0: ccl(predictions_at_t0),
        "constant": 1,
        T: model.durations,
        E: model.event_observed
    })

    # fit new dataset to flexible spline model
    # this new model connects prediction probabilities and actual survival. It should be very flexible, almost to the point of overfitting. It's goal is just to smooth out the data!
    knots = 3
    regressors = {
        "beta_": ["ccl_at_%d" % t0],
        "gamma0_": ["constant"],
        "gamma1_": ["constant"],
        "gamma2_": ["constant"]
    }

    # this model is from examples/royson_crowther_clements_splines.py
    crc = CRCSplineFitter(knots, penalizer=0)
    if CensoringType.is_right_censoring(model):
        crc.fit_right_censoring(prediction_df, T, E, regressors=regressors)
    elif CensoringType.is_left_censoring(model):
        crc.fit_left_censoring(prediction_df, T, E, regressors=regressors)
    elif CensoringType.is_interval_censoring(model):
        crc.fit_interval_censoring(prediction_df, T, E, regressors=regressors)

    # predict new model at values 0 to 1, but remember to ccl it!
    x = np.linspace(np.clip(predictions_at_t0.min() - 0.01, 0, 1),
                    np.clip(predictions_at_t0.max() + 0.01, 0, 1), 100)
    y = 1 - crc.predict_survival_function(pd.DataFrame({
        "ccl_at_%d" % t0: ccl(x),
        "constant": 1
    }),
                                          times=[t0]).T.squeeze()

    # plot our results
    ax.set_title(
        "Smoothed calibration curve of \npredicted vs observed probabilities of t ≤ %d mortality"
        % t0)

    color = "tab:red"
    ax.plot(x, y, label="smoothed calibration curve", color=color)
    ax.set_xlabel("Predicted probability of \nt ≤ %d mortality" % t0)
    ax.set_ylabel("Observed probability of \nt ≤ %d mortality" % t0,
                  color=color)
    ax.tick_params(axis="y", labelcolor=color)

    # plot x=y line
    ax.plot(x, x, c="k", ls="--")
    ax.legend()

    # plot histogram of our original predictions
    color = "tab:blue"
    twin_ax = ax.twinx()
    twin_ax.set_ylabel("Count of \npredicted probabilities",
                       color=color)  # we already handled the x-label with ax1
    twin_ax.tick_params(axis="y", labelcolor=color)
    twin_ax.hist(predictions_at_t0, alpha=0.3, bins="sqrt", color=color)

    plt.tight_layout()

    deltas = ((1 - crc.predict_survival_function(
        prediction_df, times=[t0])).T.squeeze() - predictions_at_t0).abs()
    ICI = deltas.mean()
    E50 = np.percentile(deltas, 50)
    print("ICI = ", ICI)
    print("E50 = ", E50)

    return ax, ICI, E50

Esempio n. 19

0

Mostra file

def qq_plot(model, ax=None, **plot_kwargs):
    """
    Produces a quantile-quantile plot of the empirical CDF against
    the fitted parametric CDF. Large deviances away from the line y=x
    can invalidate a model (though we expect some natural deviance in the tails).

    Parameters
    -----------
    model: obj
        A fitted lifelines univariate parametric model, like ``WeibullFitter``
    plot_kwargs:
        kwargs for the plot.

    Returns
    --------
    ax:
        The axes which was used.

    Examples
    ---------

    >>> from lifelines import *
    >>> from lifelines.plotting import qq_plot
    >>> from lifelines.datasets import load_rossi
    >>> df = load_rossi()
    >>> wf = WeibullFitter().fit(df['week'], df['arrest'])
    >>> qq_plot(wf)


    """
    from lifelines.utils import qth_survival_times
    from lifelines import KaplanMeierFitter

    if ax is None:
        ax = plt.gca()

    dist = get_distribution_name_of_lifelines_model(model)
    dist_object = create_scipy_stats_model_from_lifelines_model(model)

    COL_EMP = "empirical quantiles"
    COL_THEO = "fitted %s quantiles" % dist

    if CensoringType.is_left_censoring(model):
        kmf = KaplanMeierFitter().fit_left_censoring(model.durations, model.event_observed, label=COL_EMP)
    elif CensoringType.is_right_censoring(model):
        kmf = KaplanMeierFitter().fit_right_censoring(model.durations, model.event_observed, label=COL_EMP)
    elif CensoringType.is_interval_censoring(model):
        raise NotImplementedError("lifelines does not have a non-parametric interval model yet.")

    q = np.unique(kmf.cumulative_density_.values[:, 0])
    # this is equivalent to the old code `qth_survival_times(q, kmf.cumulative_density, cdf=True)`
    quantiles = qth_survival_times(1 - q, kmf.survival_function_)
    quantiles[COL_THEO] = dist_object.ppf(q)
    quantiles = quantiles.replace([-np.inf, 0, np.inf], np.nan).dropna()

    max_, min_ = quantiles[COL_EMP].max(), quantiles[COL_EMP].min()

    quantiles.plot.scatter(COL_THEO, COL_EMP, c="none", edgecolor="k", lw=0.5, ax=ax)
    ax.plot([min_, max_], [min_, max_], c="k", ls=":", lw=1.0)
    ax.set_ylim(min_, max_)
    ax.set_xlim(min_, max_)

    return ax

Esempio n. 20

0

Mostra file

File: plotting.py Progetto: vck/lifelines

def qq_plot(model, ax=None, **plot_kwargs):
    """
    Produces a quantile-quantile plot of the empirical CDF against
    the fitted parametric CDF. Large deviances away from the line y=x
    can invalidate a model (though we expect some natural deviance in the tails).

    Parameters
    -----------
    model: obj
        A fitted lifelines univariate parametric model, like ``WeibullFitter``
    plot_kwargs:
        kwargs for the plot.

    Returns
    --------
    ax:
        The axes which was used.

    Examples
    ---------
    .. code:: python

        from lifelines import *
        from lifelines.plotting import qq_plot
        from lifelines.datasets import load_rossi
        df = load_rossi()
        wf = WeibullFitter().fit(df['week'], df['arrest'])
        qq_plot(wf)

    Notes
    ------
    The interval censoring case uses the mean between the upper and lower bounds.

    """
    from lifelines.utils import qth_survival_times
    from lifelines import KaplanMeierFitter

    if ax is None:
        ax = plt.gca()

    dist = get_distribution_name_of_lifelines_model(model)
    dist_object = create_scipy_stats_model_from_lifelines_model(model)

    COL_EMP = "empirical quantiles"
    COL_THEO = "fitted %s quantiles" % dist

    if CensoringType.is_left_censoring(model):
        kmf = KaplanMeierFitter().fit_left_censoring(
            model.durations, model.event_observed, label=COL_EMP, weights=model.weights, entry=model.entry
        )
        sf, cdf = kmf.survival_function_[COL_EMP], kmf.cumulative_density_[COL_EMP]
    elif CensoringType.is_right_censoring(model):
        kmf = KaplanMeierFitter().fit_right_censoring(
            model.durations, model.event_observed, label=COL_EMP, weights=model.weights, entry=model.entry
        )
        sf, cdf = kmf.survival_function_[COL_EMP], kmf.cumulative_density_[COL_EMP]

    elif CensoringType.is_interval_censoring(model):
        kmf = KaplanMeierFitter().fit_interval_censoring(
            model.lower_bound, model.upper_bound, label=COL_EMP, weights=model.weights, entry=model.entry
        )
        sf, cdf = kmf.survival_function_.mean(1), kmf.cumulative_density_[COL_EMP + "_lower"]

    q = np.unique(cdf.values)

    quantiles = qth_survival_times(1 - q, sf)
    quantiles[COL_THEO] = dist_object.ppf(q)
    quantiles = quantiles.replace([-np.inf, 0, np.inf], np.nan).dropna()

    max_, min_ = quantiles[COL_EMP].max(), quantiles[COL_EMP].min()

    quantiles.plot.scatter(COL_THEO, COL_EMP, c="none", edgecolor="k", lw=0.5, ax=ax)
    ax.plot([min_, max_], [min_, max_], c="k", ls=":", lw=1.0)
    ax.set_ylim(min_, max_)
    ax.set_xlim(min_, max_)

    return ax

Esempio n. 21

0

Mostra file

File: kaplan_meier_fitter.py Progetto: CamDavidsonPilon/lifelines

    def _fit(
        self,
        durations,
        event_observed=None,
        timeline=None,
        entry=None,
        label="KM_estimate",
        alpha=None,
        ci_labels=None,
        weights=None,
    ):  # pylint: disable=too-many-arguments,too-many-locals
        """
        Parameters
        ----------
          durations: an array, list, pd.DataFrame or pd.Series
            length n -- duration subject was observed for
          event_observed: an array, list, pd.DataFrame, or pd.Series, optional
             True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None
          timeline: an array, list, pd.DataFrame, or pd.Series, optional
            return the best estimate at the values in timelines (postively increasing)
          entry: an array, list, pd.DataFrame, or pd.Series, optional
             relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population
             entered study when they were "born".
          label: string, optional
            a string to name the column of the estimate.
          alpha: float, optional
            the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only.
          left_censorship: bool, optional (default=False)
            True if durations and event_observed refer to left censorship events. Default False
          ci_labels: tuple, optional
                add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<1-alpha/2>
          weights: an array, list, pd.DataFrame, or pd.Series, optional
              if providing a weighted dataset. For example, instead
              of providing every subject as a single element of `durations` and `event_observed`, one could
              weigh subject differently.

        Returns
        -------
        self: KaplanMeierFitter
          self with new properties like ``survival_function_``, ``plot()``, ``median``

        """
        self._check_values(durations)
        if event_observed is not None:
            self._check_values(event_observed)

        self._label = label

        if weights is not None:
            weights = np.asarray(weights)
            if (weights.astype(int) != weights).any():
                warnings.warn(
                    """It looks like your weights are not integers, possibly propensity scores then?
  It's important to know that the naive variance estimates of the coefficients are biased. Instead use Monte Carlo to
  estimate the variances. See paper "Variance estimation when using inverse probability of treatment weighting (IPTW) with survival analysis"
  or "Adjusted Kaplan-Meier estimator and log-rank test with inverse probability of treatment weighting for survival data."
                  """,
                    StatisticalWarning,
                )

        # if the user is interested in left-censorship, we return the cumulative_density_, no survival_function_,
        is_left_censoring = CensoringType.is_left_censoring(self)
        primary_estimate_name = "survival_function_" if not is_left_censoring else "cumulative_density_"
        secondary_estimate_name = "cumulative_density_" if not is_left_censoring else "survival_function_"

        self.durations, self.event_observed, self.timeline, self.entry, self.event_table = _preprocess_inputs(
            durations, event_observed, timeline, entry, weights
        )

        alpha = alpha if alpha else self.alpha
        log_estimate, cumulative_sq_ = _additive_estimate(
            self.event_table, self.timeline, self._additive_f, self._additive_var, is_left_censoring
        )

        if entry is not None:
            # a serious problem with KM is that when the sample size is small and there are too few early
            # truncation times, it may happen that is the number of patients at risk and the number of deaths is the same.
            # we adjust for this using the Breslow-Fleming-Harrington estimator
            n = self.event_table.shape[0]
            net_population = (self.event_table["entrance"] - self.event_table["removed"]).cumsum()
            if net_population.iloc[: int(n / 2)].min() == 0:
                ix = net_population.iloc[: int(n / 2)].idxmin()
                raise StatError(
                    """There are too few early truncation times and too many events. S(t)==0 for all t>%g. Recommend BreslowFlemingHarringtonFitter."""
                    % ix
                )

        # estimation
        setattr(self, primary_estimate_name, pd.DataFrame(np.exp(log_estimate), columns=[self._label]))
        setattr(self, secondary_estimate_name, pd.DataFrame(1 - np.exp(log_estimate), columns=[self._label]))

        self.__estimate = getattr(self, primary_estimate_name)
        self.confidence_interval_ = self._bounds(cumulative_sq_[:, None], alpha, ci_labels)
        self.median_ = median_survival_times(self.__estimate, left_censorship=is_left_censoring)
        self._cumulative_sq_ = cumulative_sq_

        setattr(self, "confidence_interval_" + primary_estimate_name, self.confidence_interval_)
        setattr(self, "confidence_interval_" + secondary_estimate_name, 1 - self.confidence_interval_)

        # estimation methods
        self._estimation_method = primary_estimate_name
        self._estimate_name = primary_estimate_name
        self._update_docstrings()

        return self