def predict_cumulative_hazard(self, X, times=None): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. times: an iterable of increasing times to predict the cumulative hazard at. Default is the set of all durations (observed and unobserved). Uses a linear interpolation if points in time are not in the index. Returns the cumulative hazard of individuals. """ if self.strata: cumulative_hazard_ = pd.DataFrame() for stratum, stratified_X in X.groupby(self.strata): try: c_0 = self.baseline_cumulative_hazard_[[stratum]] except KeyError: raise StatError("""The stratum %s was not found in the original training data. For example, try the following on the original dataset, df: `df.groupby(%s).size()`. Expected is that %s is not present in the output. """ % (stratum, self.strata, stratum)) col = _get_index(stratified_X) v = self.predict_partial_hazard(stratified_X) cumulative_hazard_ = cumulative_hazard_.merge(pd.DataFrame(np.dot(c_0, v.T), index=c_0.index, columns=col), how='outer', right_index=True, left_index=True) else: c_0 = self.baseline_cumulative_hazard_ col = _get_index(X) v = self.predict_partial_hazard(X) cumulative_hazard_ = pd.DataFrame(np.dot(c_0, v.T), columns=col, index=c_0.index) if times is not None: # non-linear interpolations can push the survival curves above 1 and below 0. return cumulative_hazard_.reindex(cumulative_hazard_.index.union(times)).interpolate("index").loc[times] else: return cumulative_hazard_
def predict_log_partial_hazard(self, X): r""" This is equivalent to R's linear.predictors. Returns the log of the partial hazard for the individuals, partial since the baseline hazard is not included. Equal to :math:`(x - \bar{x})'\beta ` Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns ------- DataFrame Note ----- If X is a DataFrame, the order of the columns do not matter. But if X is an array, then the column ordering is assumed to be the same as the training dataset. """ if isinstance(X, pd.DataFrame): order = self.hazards_.index X = X[order] check_for_numeric_dtypes_or_raise(X) X = X.astype(float) index = _get_index(X) X = normalize(X, self._norm_mean.values, 1) return pd.DataFrame(np.dot(X, self.hazards_), index=index)
def predict_cumulative_hazard(self, X, id_col=None): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns the hazard rates for the individuals """ if id_col is not None: # see https://github.com/CamDavidsonPilon/lifelines/issues/38 raise NotImplementedError n, d = X.shape cols = _get_index(X) if isinstance(X, pd.DataFrame): order = self.cumulative_hazards_.columns order = order.drop('baseline') if self.fit_intercept else order X_ = X[order].values.copy() else: X_ = X.copy() X_ = X_ if not self.fit_intercept else np.c_[X_, np.ones((n, 1))] individual_cumulative_hazards_ = pd.DataFrame(np.dot(self.cumulative_hazards_, X_.T), index=self.timeline, columns=cols) if self.nn_cumulative_hazard: individual_cumulative_hazards_[individual_cumulative_hazards_ < 0.] = 0. return individual_cumulative_hazards_
def predict_expectation(self, X): """ Compute the expected lifetime, E[T], using covarites X. """ index = _get_index(X) v = self.predict_survival_function(X)[index] return pd.DataFrame(trapz(v.values.T, v.index), index=index)
def predict_cumulative_hazard(self, X, times=None, ancillary_X=None): """ Return the cumulative hazard rate of subjects in X at time points. Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. times: iterable, optional an iterable of increasing times to predict the cumulative hazard at. Default is the set of all durations (observed and unobserved). Uses a linear interpolation if points in time are not in the index. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns ------- cumulative_hazard_ : DataFrame the cumulative hazard of individuals over the timeline """ import numpy as np times = coalesce(times, self.timeline, np.unique(self.durations)) exp_mu_, sigma_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X) mu_ = np.log(exp_mu_) Z = np.subtract.outer(np.log(times), mu_) / sigma_ return pd.DataFrame(-logsf(Z), columns=_get_index(X), index=times)
def predict_median(self, X, ancillary_X=None): """ Returns the median lifetimes for the individuals. If the survival curve of an individual does not cross 0.5, then the result is infinity. http://stats.stackexchange.com/questions/102986/percentile-loss-functions Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. p: float, optional (default=0.5) the percentile, must be between 0 and 1. Returns ------- DataFrame See Also -------- predict_percentile """ exp_mu_, _ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X) return pd.DataFrame(exp_mu_, index=_get_index(X))
def predict_expectation(self, X, ancillary_X=None): """ Predict the expectation of lifetimes, :math:`E[T | x]`. Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns ------- percentiles: DataFrame the median lifetimes for the individuals. If the survival curve of an individual does not cross 0.5, then the result is infinity. See Also -------- predict_median """ exp_mu_, sigma_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X) return pd.DataFrame(exp_mu_ * np.exp(sigma_ ** 2 / 2), index=_get_index(X))
def predict_partial_hazard(self, X): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. If covariates were normalized during fitting, they are normalized in the same way here. If X is a dataframe, the order of the columns do not matter. But if X is an array, then the column ordering is assumed to be the same as the training dataset. Returns the partial hazard for the individuals, partial since the baseline hazard is not included. Equal to \exp{\beta X} """ index = _get_index(X) if isinstance(X, pd.DataFrame): order = self.hazards_.columns X = X[order] if self.normalize: # Assuming correct ordering and number of columns X = normalize(X, self._norm_mean.values, self._norm_std.values) return pd.DataFrame(exp(np.dot(X, self.hazards_.T)), index=index)
def predict_cumulative_hazard(self, X): """ Returns the hazard rates for the individuals Parameters ---------- X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. """ n, _ = X.shape cols = _get_index(X) if isinstance(X, pd.DataFrame): order = self.cumulative_hazards_.columns order = order.drop("_intercept") if self.fit_intercept else order X_ = X[order].values else: X_ = X X_ = X_ if not self.fit_intercept else np.c_[X_, np.ones((n, 1))] timeline = self._index individual_cumulative_hazards_ = pd.DataFrame( np.dot(self.cumulative_hazards_, X_.T), index=timeline, columns=cols ) return individual_cumulative_hazards_
def predict_percentile(self, X, p=0.5): """ X: a (n,d) covariate matrix Returns the median lifetimes for the individuals. http://stats.stackexchange.com/questions/102986/percentile-loss-functions """ index = _get_index(X) return qth_survival_times(p, self.predict_survival_function(X)[index])
def predict_cumulative_hazard(self, X, times=None): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. times: an iterable of increasing times to predict the cumulative hazard at. Default is the set of all durations (observed and unobserved). Uses a linear interpolation if points in time are not in the index. Returns the cumulative hazard of individuals. """ if self.strata: cumulative_hazard_ = pd.DataFrame() for stratum, stratified_X in X.groupby(self.strata): try: c_0 = self.baseline_cumulative_hazard_[[stratum]] except KeyError: raise StatError( """The stratum %s was not found in the original training data. For example, try the following on the original dataset, df: `df.groupby(%s).size()`. Expected is that %s is not present in the output. """ % (stratum, self.strata, stratum)) col = _get_index(stratified_X) v = self.predict_partial_hazard(stratified_X) cumulative_hazard_ = cumulative_hazard_.merge(pd.DataFrame( np.dot(c_0, v.T), index=c_0.index, columns=col), how='outer', right_index=True, left_index=True) else: c_0 = self.baseline_cumulative_hazard_ col = _get_index(X) v = self.predict_partial_hazard(X) cumulative_hazard_ = pd.DataFrame(np.dot(c_0, v.T), columns=col, index=c_0.index) if times is not None: # non-linear interpolations can push the survival curves above 1 and below 0. return cumulative_hazard_.reindex( cumulative_hazard_.index.union(times)).interpolate( "index").loc[times] else: return cumulative_hazard_
def predict_percentile(self, df, ancillary_df=None, p=0.5, conditional_after=None): """ Returns the median lifetimes for the individuals, by default. If the survival curve of an individual does not cross ``p``, then the result is infinity. http://stats.stackexchange.com/questions/102986/percentile-loss-functions Parameters ---------- X: DataFrame a (n,d) DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. ancillary_X: DataFrame, optional a (n,d) DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. p: float, optional (default=0.5) the percentile, must be between 0 and 1. Returns ------- percentiles: DataFrame See Also -------- predict_median """ alpha_, beta_ = self._prep_inputs_for_prediction_and_return_scores( df, ancillary_df) if conditional_after is None: return pd.DataFrame(alpha_ * (1 / (p) - 1)**(1 / beta_), index=_get_index(df)) else: conditional_after = np.asarray(conditional_after) S = 1 / (1 + (conditional_after / alpha_)**beta_) return pd.DataFrame(alpha_ * (1 / (p * S) - 1)**(1 / beta_) - conditional_after, index=_get_index(df))
def predict_expectation(self, X, ancillary_X=None): """ Predict the median lifetimes for the individuals. If the survival curve of an individual does not cross 0.5, then the result is infinity. Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns ------- percentiles: DataFrame the median lifetimes for the individuals. If the survival curve of an individual does not cross 0.5, then the result is infinity. See Also -------- predict_median """ X = X.copy() if ancillary_X is None: ancillary_X = pd.DataFrame(np.ones((X.shape[0], 1)), columns=["_intercept"]) elif isinstance(ancillary_X, pd.DataFrame): ancillary_X = ancillary_X.copy() if self.fit_intercept: ancillary_X["_intercept"] = 1.0 ancillary_X = ancillary_X[self.params_.loc["rho_"].index] else: assert ancillary_X.shape[1] == (self.params_.loc["rho_"].shape[0] + 1) # 1 for _intercept if isinstance(X, pd.DataFrame): if self.fit_intercept: X["_intercept"] = 1.0 X = X[self.params_.loc["lambda_"].index] else: assert X.shape[1] == (self.params_.loc["lambda_"].shape[0] + 1 ) # 1 for _intercept lambda_params = self.params_[self._LOOKUP_SLICE["lambda_"]] lambda_ = np.exp(np.dot(X, lambda_params)) rho_params = self.params_[self._LOOKUP_SLICE["rho_"]] rho_ = np.exp(np.dot(ancillary_X, rho_params)) subjects = _get_index(X) return pd.DataFrame((lambda_ * gamma(1 + 1 / rho_)), index=subjects)
def predict_cumulative_hazard(self, X): """ X: a (n,d) covariate matrix Returns the cumulative hazard for the individuals. """ v = self.predict_partial_hazard(X) s_0 = self.baseline_survival_ col = _get_index(X) return pd.DataFrame(-np.dot(np.log(s_0), v.T), index=self.baseline_survival_.index, columns=col)
def predict_percentile( self, df: DataFrame, *, ancillary_df: Optional[DataFrame] = None, p: float = 0.5, conditional_after: Optional[ndarray] = None ) -> DataFrame: """ Returns the median lifetimes for the individuals, by default. If the survival curve of an individual does not cross ``p``, then the result is infinity. http://stats.stackexchange.com/questions/102986/percentile-loss-functions Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. p: float, optional (default=0.5) the percentile, must be between 0 and 1. conditional_after: iterable, optional Must be equal is size to df.shape[0] (denoted `n` above). An iterable (array, list, series) of possibly non-zero values that represent how long the subject has already lived for. Ex: if :math:`T` is the unknown event time, then this represents :math:`T | T > s`. This is useful for knowing the *remaining* hazard/survival of censored subjects. The new timeline is the remaining duration of the subject, i.e. normalized back to starting at 0. Returns ------- percentiles: DataFrame See Also -------- predict_median """ exp_mu_, sigma_ = self._prep_inputs_for_prediction_and_return_scores(df, ancillary_df) if conditional_after is None: return pd.DataFrame(exp_mu_ * np.exp(np.sqrt(2) * sigma_ * erfinv(2 * (1 - p) - 1)), index=_get_index(df)) else: conditional_after = np.asarray(conditional_after) Z = (np.log(conditional_after) - np.log(exp_mu_)) / sigma_ S = norm.sf(Z) return pd.DataFrame( exp_mu_ * np.exp(np.sqrt(2) * sigma_ * erfinv(2 * (1 - p * S) - 1)) - conditional_after, index=_get_index(df), )
def predict_expectation(self, X): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Compute the expected lifetime, E[T], using covarites X. """ index = _get_index(X) v = self.predict_survival_function(X)[index] return pd.DataFrame(trapz(v.values.T, v.index), index=index)
def predict_percentile(self, X, p=0.5): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. By default, returns the median lifetimes for the individuals. http://stats.stackexchange.com/questions/102986/percentile-loss-functions """ index = _get_index(X) return qth_survival_times(p, self.predict_survival_function(X)[index])
def predict_cumulative_hazard(self, X): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. """ if self.strata: cumulative_hazard_ = pd.DataFrame() for stratum, stratified_X in X.groupby(self.strata): s_0 = self.baseline_survival_[[stratum]] col = _get_index(stratified_X) v = self.predict_partial_hazard(stratified_X) cumulative_hazard_ = cumulative_hazard_.merge(pd.DataFrame(-np.dot(np.log(s_0), v.T), index=s_0.index, columns=col), how='outer', right_index=True, left_index=True) else: s_0 = self.baseline_survival_ col = _get_index(X) v = self.predict_partial_hazard(X) cumulative_hazard_ = pd.DataFrame(-np.dot(np.log(s_0), v.T), columns=col, index=s_0.index) return cumulative_hazard_
def predict_cumulative_hazard(self, X): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns the cumulative hazard for the individuals. """ v = self.predict_partial_hazard(X) s_0 = self.baseline_survival_ col = _get_index(X) return pd.DataFrame(-np.dot(np.log(s_0), v.T), index=self.baseline_survival_.index, columns=col)
def predict_expectation(self, X): """ Compute the expected lifetime, E[T], using covarites X. X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns the expected lifetimes for the individuals """ index = _get_index(X) t = self.cumulative_hazards_.index return pd.DataFrame(trapz(self.predict_survival_function(X)[index].values.T, t), index=index)
def predict_cumulative_hazard(self, X, times=None): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. times: an iterable of increasing times to predict the cumulative hazard at. Default is the set of all durations (observed and unobserved). Uses a linear interpolation if points in time are not in the index. Returns the cumulative hazard of individuals. """ if self.strata: cumulative_hazard_ = pd.DataFrame() for stratum, stratified_X in X.groupby(self.strata): c_0 = self.baseline_cumulative_hazard_[[stratum]] col = _get_index(stratified_X) v = self.predict_partial_hazard(stratified_X) cumulative_hazard_ = cumulative_hazard_.merge(pd.DataFrame( np.dot(c_0, v.T), index=c_0.index, columns=col), how='outer', right_index=True, left_index=True) else: c_0 = self.baseline_cumulative_hazard_ col = _get_index(X) v = self.predict_partial_hazard(X) cumulative_hazard_ = pd.DataFrame(np.dot(c_0, v.T), columns=col, index=c_0.index) if times is not None: # non-linear interpolations can push the survival curves above 1 and below 0. return cumulative_hazard_.reindex( cumulative_hazard_.index.union(times)).interpolate( "index").loc[times] else: return cumulative_hazard_
def predict_percentile(self, X, p=0.5): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns the median lifetimes for the individuals, by default. If the survival curve of an individual does not cross 0.5, then the result is infinity. http://stats.stackexchange.com/questions/102986/percentile-loss-functions """ subjects = _get_index(X) return qth_survival_times( p, self.predict_survival_function(X)[subjects]).T
def predict_percentile( self, df: DataFrame, *, ancillary_df: Optional[DataFrame] = None, p: float = 0.5, conditional_after: Optional[ndarray] = None) -> DataFrame: """ Returns the median lifetimes for the individuals, by default. If the survival curve of an individual does not cross 0.5, then the result is infinity. http://stats.stackexchange.com/questions/102986/percentile-loss-functions Parameters ---------- df: DataFrame a (n,d) DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. ancillary_df: DataFrame, optional a (n,d) DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. p: float, optional (default=0.5) the percentile, must be between 0 and 1. Returns ------- percentiles: DataFrame See Also -------- predict_median """ lambda_, rho_ = self._prep_inputs_for_prediction_and_return_scores( df, ancillary_df) if conditional_after is None and len(df.shape) == 2: conditional_after = np.zeros(df.shape[0]) elif conditional_after is None and len(df.shape) == 1: conditional_after = np.zeros(1) return pd.DataFrame( lambda_ * np.power(-np.log(p) + (conditional_after / lambda_)**rho_, 1 / rho_) - conditional_after, index=_get_index(df), )
def predict_expectation(self, X): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Compute the expected lifetime, E[T], using covarites X. This algorithm to compute the expection is to use the fact that E[T] = int_0^inf P(T > t) dt = int_0^inf S(t) dt To compute the integal, we use the trapizoidal rule to approximate the integral. However, if the survival function, S(t), doesn't converge to 0, the the expectation is really infinity. """ subjects = _get_index(X) v = self.predict_survival_function(X)[subjects] return pd.DataFrame(trapz(v.values.T, v.index), index=subjects)
def predict_log_partial_hazard(self, X): r""" This is equivalent to R's linear.predictors. Returns the log of the partial hazard for the individuals, partial since the baseline hazard is not included. Equal to :math:`\beta (X - mean(X_{train}))` Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns ------- log_partial_hazard: DataFrame Notes ----- If X is a dataframe, the order of the columns do not matter. But if X is an array, then the column ordering is assumed to be the same as the training dataset. """ hazard_names = self.hazards_.columns if isinstance(X, pd.DataFrame): order = hazard_names X = X[order] pass_for_numeric_dtypes_or_raise(X) elif isinstance( X, pd.Series) and ((X.shape[0] == len(hazard_names) + 2) or (X.shape[0] == len(hazard_names))): X = X.to_frame().T order = hazard_names X = X[order] pass_for_numeric_dtypes_or_raise(X) elif isinstance(X, pd.Series): assert len(hazard_names) == 1, "Series not the correct arugment" X = pd.DataFrame(X) pass_for_numeric_dtypes_or_raise(X) X = X.astype(float) index = _get_index(X) X = normalize(X, self._norm_mean.values, 1) return pd.DataFrame(np.dot(X, self.hazards_.T), index=index)
def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> pd.DataFrame: """ Return the cumulative hazard rate of subjects in X at time points. Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. times: iterable, optional an iterable of increasing times to predict the cumulative hazard at. Default is the set of all durations (observed and unobserved). Uses a linear interpolation if points in time are not in the index. Returns ------- cumulative_hazard_ : DataFrame the cumulative hazard of individuals over the timeline """ if isinstance(df, pd.Series): return self.predict_cumulative_hazard(df.to_frame().T) if conditional_after is not None: raise NotImplementedError() times = np.atleast_1d( coalesce(times, self.timeline, np.unique(self.durations))).astype(float) n = times.shape[0] times = times.reshape((n, 1)) lambdas_ = self._prep_inputs_for_prediction_and_return_parameters(df) bp = np.append(self.breakpoints, [np.inf]) M = np.minimum(np.tile(bp, (n, 1)), times) M = np.hstack([M[:, tuple([0])], np.diff(M, axis=1)]) return pd.DataFrame(np.dot(M, (1 / lambdas_)), columns=_get_index(df), index=times[:, 0])
def predict_log_partial_hazard(self, X): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. If X is a dataframe, the order of the columns do not matter. But if X is an array, then the column ordering is assumed to be the same as the training dataset. Returns the log of the partial hazard for the individuals, partial since the baseline hazard is not included. Equal to \beta X """ if isinstance(X, pd.DataFrame): order = self.hazards_.columns X = X[order] index = _get_index(X) X = normalize(X, self._norm_mean.values, 1) return pd.DataFrame(np.dot(X, self.hazards_.T), index=index)
def predict_cumulative_hazard(self, X, times=None): """ Return the cumulative hazard rate of subjects in X at time points. Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. times: iterable, optional an iterable of increasing times to predict the cumulative hazard at. Default is the set of all durations (observed and unobserved). Uses a linear interpolation if points in time are not in the index. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns ------- cumulative_hazard_ : DataFrame the cumulative hazard of individuals over the timeline """ times = np.asarray( coalesce(times, self.timeline, np.unique(self.durations))) n = times.shape[0] times = times.reshape((n, 1)) lambdas_ = self._prep_inputs_for_prediction_and_return_parameters(X) bp = self.breakpoints M = np.minimum(np.tile(bp, (n, 1)), times) M = np.hstack([M[:, tuple([0])], np.diff(M, axis=1)]) return pd.DataFrame(np.dot(M, (1 / lambdas_)), columns=_get_index(X), index=times[:, 0])
def predict_cumulative_hazard(self, X, id_col=None): """ X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns the hazard rates for the individuals """ if id_col is not None: # see https://github.com/CamDavidsonPilon/lifelines/issues/38 raise NotImplementedError n, d = X.shape cols = _get_index(X) if isinstance(X, pd.DataFrame): order = self.cumulative_hazards_.columns order = order.drop('baseline') if self.fit_intercept else order X_ = X[order].values.copy() else: X_ = X.copy() X_ = X_ if not self.fit_intercept else np.c_[X_, np.ones((n, 1))] return pd.DataFrame(np.dot(self.cumulative_hazards_, X_.T), index=self.timeline, columns=cols)
def predict_expectation(self, X): r""" Compute the expected lifetime, :math:`E[T]`, using covarites X. This algorithm to compute the expection is to use the fact that :math:`E[T] = \int_0^\inf P(T > t) dt = \int_0^\inf S(t) dt`. To compute the integal, we use the trapizoidal rule to approximate the integral. Caution -------- However, if the survival function doesn't converge to 0, the the expectation is really infinity and the returned values are meaningless/too large. In that case, using ``predict_median`` or ``predict_percentile`` would be better. Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns ------- expectations : DataFrame Notes ----- If X is a dataframe, the order of the columns do not matter. But if X is an array, then the column ordering is assumed to be the same as the training dataset. See Also -------- predict_median predict_percentile """ subjects = _get_index(X) v = self.predict_survival_function(X)[subjects] return pd.DataFrame(trapz(v.values.T, v.index), index=subjects)
def predict_cumulative_hazard(self, X): """ Returns the hazard rates for the individuals Parameters ---------- X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. """ cols = _get_index(X) if isinstance(X, pd.DataFrame): X = self.regressors.transform_df(X)["beta_"] elif isinstance(X, pd.Series): return self.predict_cumulative_hazard(X.to_frame().T.infer_objects()) X = X.astype(float) timeline = self._index individual_cumulative_hazards_ = pd.DataFrame(np.dot(self.cumulative_hazards_, X.T), index=timeline, columns=cols) return individual_cumulative_hazards_
def predict_percentile(self, X, ancillary_X=None, p=0.5): """ Returns the median lifetimes for the individuals, by default. If the survival curve of an individual does not cross ``p``, then the result is infinity. http://stats.stackexchange.com/questions/102986/percentile-loss-functions Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. p: float, optional (default=0.5) the percentile, must be between 0 and 1. Returns ------- percentiles: DataFrame See Also -------- predict_median """ exp_mu_, sigma_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X) return pd.DataFrame(exp_mu_ * np.exp(np.sqrt(2) * sigma_ * erfinv(2 * p - 1)), index=_get_index(X))