Ejemplo n.º 1
0
    def predict(self, times):
        """
        Predict the {0} at certain point in time. Uses a linear interpolation if
        points in time are not in the index.

        Parameters
        ----------
        times: a scalar or an array of times to predict the value of {0} at.

        Returns
        -------
        predictions: a scalar if time is a scalar, a numpy array if time in an array.
        """
        if callable(self._estimation_method):
            return pd.DataFrame(self._estimation_method(_to_array(times)), index=_to_array(times)).loc[times].squeeze()
        estimate = getattr(self, self._estimation_method)
        # non-linear interpolations can push the survival curves above 1 and below 0.
        return dataframe_interpolate_at_times(estimate, times)
Ejemplo n.º 2
0
    def __init__(self, p_value, test_statistic, name=None, **kwargs):
        self.p_value = p_value
        self.test_statistic = test_statistic

        self._p_value = _to_array(p_value)
        self._test_statistic = _to_array(test_statistic)

        assert len(self._p_value) == len(self._test_statistic)

        if name is not None:
            self.name = _to_list(name)
            assert len(self.name) == len(self._test_statistic)
        else:
            self.name = None

        for kw, value in kwargs.items():
            setattr(self, kw, value)

        self._kwargs = kwargs
Ejemplo n.º 3
0
    def survival_function_at_times(self, times):
        """
        Return a Pandas series of the predicted survival value at specific times

        Parameters
        -----------
        times: iterable or float

        Returns
        --------
        pd.Series

        """
        return pd.Series(np.exp(-self.lambda_ * times), index=_to_array(times))
Ejemplo n.º 4
0
    def cumulative_density_at_times(self, times, label=None):
        """
        Return a Pandas series of the predicted cumulative density at specific times

        Parameters
        -----------
        times: iterable or float

        Returns
        --------
        pd.Series

        """
        label = coalesce(label, self._label)
        return pd.Series(1 - self.predict(times), index=_to_array(times), name=label)
Ejemplo n.º 5
0
    def survival_function_at_times(self, times, label=None):
        """
        Return a Pandas series of the predicted survival value at specific times

        Parameters
        -----------
        times: iterable or float

        Returns
        --------
        pd.Series

        """
        label = coalesce(label, self._label)
        return pd.Series(self.predict(times), index=_to_array(times), name=label)
    def cumulative_density_at_times(self, times, label=None):
        """
        Return a Pandas series of the predicted cumulative density at specific times

        Parameters
        -----------
        times: iterable or float

        Returns
        --------
        pd.Series

        """
        label = coalesce(label, self._label)
        return pd.Series(1 - self.predict(times), index=_to_array(times), name=label)
Ejemplo n.º 7
0
    def survival_function_at_times(self, times):
        """
        Return a Pandas series of the predicted survival value at specific times

        Parameters
        -----------
        times: iterable or float

        Returns
        --------
        pd.Series

        """
        return pd.Series(1 - norm.cdf((log(times) - self.mu_) / self.sigma_),
                         index=_to_array(times))
    def survival_function_at_times(self, times, label=None):
        """
        Return a Pandas series of the predicted survival value at specific times

        Parameters
        -----------
        times: iterable or float

        Returns
        --------
        pd.Series

        """
        label = coalesce(label, self._label)
        return pd.Series(self.predict(times), index=_to_array(times), name=label)
Ejemplo n.º 9
0
    def hazard_at_times(self, times, label=None):
        """
        Return a Pandas series of the predicted hazard at specific times.

        Parameters
        -----------
        times: iterable or float
          values to return the hazard at.
        label: string, optional
          Rename the series returned. Useful for plotting.

        Returns
        --------
        pd.Series

        """
        label = coalesce(label, self._label)
        return pd.Series(self._hazard(self._fitted_parameters_, times), index=_to_array(times), name=label)
Ejemplo n.º 10
0
 def cumulative_hazard_at_times(self, times):
     return pd.Series((self.lambda_ * times) ** self.rho_, index=_to_array(times))
Ejemplo n.º 11
0
 def hazard_at_times(self, times):
     return pd.Series(self.lambda_ * self.rho_ * (self.lambda_ * times) ** (self.rho_ - 1), index=_to_array(times))
Ejemplo n.º 12
0
 def survival_function_at_times(self, times):
     return pd.Series(np.exp(-self.cumulative_hazard_at_times(times)), index=_to_array(times))
Ejemplo n.º 13
0
def proportional_hazard_test(fitted_cox_model,
                             training_df,
                             time_transform="rank",
                             precomputed_residuals=None,
                             **kwargs):
    """
    Test whether any variable in a Cox model breaks the proportional hazard assumption.

    Parameters
    ----------
    fitted_cox_model: CoxPHFitter
        the fitted Cox model, fitted with `training_df`, you wish to test. Currently only the CoxPHFitter is supported,
        but later CoxTimeVaryingFitter, too.
    training_df: DataFrame
        the DataFrame used in the call to the Cox model's ``fit``.
    time_transform: vectorized function, list, or string, optional (default='rank')
        {'all', 'km', 'rank', 'identity', 'log'}
        One of the strings above, a list of strings, or a function to transform the time (must accept (time, durations, weights) however). 'all' will present all the transforms.
    precomputed_residuals: DataFrame, optional
        specify the residuals, if already computed.
    kwargs:
        additional parameters to add to the StatisticalResult

    Returns
    -------
    StatisticalResult

    Notes
    ------
    R uses the default `km`, we use `rank`, as this performs well versus other transforms. See
    http://eprints.lse.ac.uk/84988/1/06_ParkHendry2015-ReassessingSchoenfeldTests_Final.pdf

    """

    events, durations, weights = fitted_cox_model.event_observed, fitted_cox_model.durations, fitted_cox_model.weights
    deaths = events.sum()

    if precomputed_residuals is None:
        scaled_resids = fitted_cox_model.compute_residuals(
            training_df, kind="scaled_schoenfeld")
    else:
        scaled_resids = precomputed_residuals

    def compute_statistic(times, resids):
        times -= times.mean()
        T = (times.values[:, None] * resids.values).sum(0)**2 / (
            deaths * np.diag(fitted_cox_model.variance_matrix_) *
            (times**2).sum())
        return T

    if time_transform == "all":
        time_transform = list(TimeTransformers.TIME_TRANSFOMERS.keys())

    if isinstance(time_transform, list):

        result = StatisticalResult([], [], [])

        # yuck
        for transform_name, transform in ((_, TimeTransformers().get(_))
                                          for _ in time_transform):
            times = transform(durations, events, weights)[events.values]
            T = compute_statistic(times, scaled_resids)
            p_values = _to_array([chisq_test(t, 1) for t in T])
            result += StatisticalResult(
                p_values,
                T,
                name=[(c, transform_name)
                      for c in fitted_cox_model.hazards_.index],
                test_name="proportional_hazard_test",
                null_distribution="chi squared",
                degrees_of_freedom=1,
                **kwargs)

    else:
        time_transformer = TimeTransformers().get(time_transform)
        assert callable(
            time_transformer
        ), "time_transform must be a callable function, or a string: {'rank', 'km', 'identity', 'log'}."

        times = time_transformer(durations, events, weights)[events.values]

        T = compute_statistic(times, scaled_resids)

        p_values = _to_array([chisq_test(t, 1) for t in T])
        result = StatisticalResult(
            p_values,
            T,
            name=fitted_cox_model.hazards_.index.tolist(),
            test_name="proportional_hazard_test",
            time_transform=time_transform,
            null_distribution="chi squared",
            degrees_of_freedom=1,
            **kwargs)
    return result
Ejemplo n.º 14
0
 def hazard_at_times(self, times):
     return pd.Series(
         norm.pdf((log(times) - self.mu_) / self.sigma_) /
         (self.sigma_ * times * self.survival_function_at_times(times)),
         index=_to_array(times),
     )
Ejemplo n.º 15
0
 def hazard_at_times(self, times):
     return pd.Series(self.lambda_, index=_to_array(times))
Ejemplo n.º 16
0
 def cumulative_hazard_at_times(self, times):
     return pd.Series(
         -log(1 - norm.cdf((log(times) - self.mu_) / self.sigma_)),
         index=_to_array(times))
Ejemplo n.º 17
0
    def plot_covariate_groups(self,
                              covariates,
                              values,
                              plot_baseline=True,
                              **kwargs):
        """
        Produces a visual representation comparing the baseline survival curve of the model versus
        what happens when a covariate(s) is varied over values in a group. This is useful to compare
        subjects' survival as we vary covariate(s), all else being held equal. The baseline survival
        curve is equal to the predicted survival curve at all average values in the original dataset.

        Parameters
        ----------
        covariates: string or list
            a string (or list of strings) of the covariate in the original dataset that we wish to vary.
        values: 1d or 2d iterable
            an iterable of the values we wish the covariate to take on.
        plot_baseline: bool
            also display the baseline survival, defined as the survival at the mean of the original dataset.
        kwargs:
            pass in additional plotting commands

        Returns
        -------
        ax: matplotlib axis, or list of axis'
            the matplotlib axis that be edited.

        Examples
        ---------

        >>> from lifelines import datasets, WeibullAFTFitter
        >>> rossi = datasets.load_rossi()
        >>> wf = WeibullAFTFitter().fit(rossi, 'week', 'arrest')
        >>> wf.plot_covariate_groups('prio', values=np.arange(0, 15), cmap='coolwarm')

        >>> # multiple variables at once
        >>> wf.plot_covariate_groups(['prio', 'paro'], values=[[0, 0], [5, 0], [10, 0], [0, 1], [5, 1], [10, 1]], cmap='coolwarm')

        >>> # if you have categorical variables, you can simply things:
        >>> wf.plot_covariate_groups(['dummy1', 'dummy2', 'dummy3'], values=np.eye(3))


        """
        from matplotlib import pyplot as plt

        covariates = _to_list(covariates)
        values = _to_array(values)
        if len(values.shape) == 1:
            values = values[None, :].T

        if len(covariates) != values.shape[1]:
            raise ValueError(
                "The number of covariates must equal to second dimension of the values array."
            )

        original_columns = self.params_.index.get_level_values(1)
        for covariate in covariates:
            if covariate not in original_columns:
                raise KeyError(
                    "covariate `%s` is not present in the original dataset" %
                    covariate)

        ax = kwargs.pop("ax", None) or plt.figure().add_subplot(111)

        # model X
        x_bar = self._norm_mean.to_frame().T
        X = pd.concat([x_bar] * values.shape[0])
        if np.array_equal(np.eye(len(covariates)), values):
            X.index = ["%s=1" % c for c in covariates]
        else:
            X.index = [
                ", ".join("%s=%g" % (c, v) for (c, v) in zip(covariates, row))
                for row in values
            ]
        for covariate, value in zip(covariates, values.T):
            X[covariate] = value

        # model ancillary X
        x_bar_anc = self._norm_mean_ancillary.to_frame().T
        ancillary_X = pd.concat([x_bar_anc] * values.shape[0])
        for covariate, value in zip(covariates, values.T):
            ancillary_X[covariate] = value

        if self.fit_intercept:
            X["_intercept"] = 1.0
            ancillary_X["_intercept"] = 1.0

        self.predict_survival_function(X,
                                       ancillary_X=ancillary_X).plot(ax=ax,
                                                                     **kwargs)
        if plot_baseline:
            self.predict_survival_function(
                x_bar, ancillary_X=x_bar_anc).rename(columns={
                    0: "baseline survival"
                }).plot(ax=ax, ls=":", color="k")
        return ax