def bch_at(df, time): try: res = df.loc[time, "baseline cumulative hazard"] except KeyError: # Index of the BCH dataframe are floats, which may not be exact values, so we check for the closest one res = interpolate_at_times(df, [time])[0] return res
def _run_xgboost(model: xgb.Booster, data: pd.DataFrame) -> pd.DataFrame: """Retrieve the win probability. Parameters ---------- model : xgb.Booster The fitted XGBoost model. data : pd.DataFrame The input dataset to be evaluated. Returns ------- np.ndarray The updated dataset. """ # First, get the partial hazard values hazard = model.predict( xgb.DMatrix(data[META["static"] + META["dynamic"]])) # Get the cumulative probability c0 = interpolate_at_times(model.cumulative_hazard_, data["stop"].values) new = data.copy() new[META["survival"]] = 1 - np.exp(-(c0 * hazard)) return new
def _run_lifelines(model: CoxTimeVaryingFitter, data: pd.DataFrame) -> pd.DataFrame: """Retrieve the win probability. Parameters ---------- model : CoxTimeVaryingFitter The fitted model. data : pd.DataFrame The input dataset to be evaluated. Returns ------- pd.DataFrame The updated dataset. """ # Get the cumulative hazard -- copying from ``lifelines.fitters.SemiParametericPHFitter`` vals = model.predict_partial_hazard(data[META["static"] + META["dynamic"]]) c0 = interpolate_at_times(model.baseline_cumulative_hazard_, data["stop"].values) # Survival is the negative exponent of the cumulative hazard new = data.copy() new[META["survival"]] = 1 - np.exp(-(c0 * vals.values)) return new
def func(params): model = CoxTimeVaryingFitter(**params, **kwargs) model.fit( train_data[[META["id"], META["event"]] + ["start", "stop"] + META["static"] + META["dynamic"]], id_col=META["id"], event_col=META["event"], start_col="start", stop_col="stop", ) metric: List[float] = [] for dataset in tune_data: predt = model.predict_partial_hazard(dataset) c0 = interpolate_at_times(model.baseline_cumulative_hazard_, dataset["stop"].values) metric.append( roc_auc_score( y_true=dataset[META["event"]], y_score=1 - np.exp(-(c0 * predt.values)), )) metric = -np.average(np.array(metric)) if metric < self.metric_: self.metric_ = metric self.best_.update(params) self.logger.info( f"New best metric value of {self.metric_} with \n\n{pformat(self.best_)}\n" ) return { "loss": metric, "status": STATUS_OK, }
def predict_at_time(self, x, times_to_evaluate_at): if isinstance(times_to_evaluate_at, float) or len(times_to_evaluate_at) != len(x): times_to_evaluate_at = np.tile(times_to_evaluate_at, (len(x), 1)) c_0 = interpolate_at_times(self.model.baseline_cumulative_hazard_, times_to_evaluate_at).T v = self.model.predict_partial_hazard(x) y_pred = c_0 * v.values return y_pred
def func(params): # Train the model model = xgb.train( { "learning_rate": params["learning_rate"], "subsample": params["subsample"], "max_delta_step": params["max_delta_step"], "max_depth": int(params["max_depth"]), "gamma": params["gamma"], "reg_alpha": params["reg_alpha"], "reg_lambda": params["reg_lambda"], "colsample_bytree": params["colsample_bytree"], "colsample_bylevel": params["colsample_bylevel"], "colsample_bynode": params["colsample_bynode"], "min_child_weight": int(params["min_child_weight"]), "monotone_constraints": params["monotone_constraints"], "objective": "survival:cox", }, dtrain, evals=evals, verbose_eval=False, **kwargs, ) cumulative_hazard_ = _generate_cumulative_hazard( model=model, train_data=train_data, dtrain=dtrain) metric: List[float] = [] for dataset in tune_data: tuning = dataset.copy() tuning.loc[tuning[META["event"]] == 0, "stop"] = -tuning["stop"] dtune = xgb.DMatrix(tuning[META["static"] + META["dynamic"]], tuning["stop"]) predt = model.predict(dtune) c0 = interpolate_at_times(cumulative_hazard_, dataset["stop"].values) metric.append( roc_auc_score( y_true=tuning[META["event"]].values, y_score=1 - np.exp(-(c0 * predt)), )) metric = -np.average(np.array(metric)) if metric < self.metric_: self.metric_ = metric self.best_.update(params) self.logger.info( f"New best metric value of {self.metric_} with \n\n{pformat(self.best_)}\n" ) return { "loss": metric, "status": STATUS_OK, }