def test_survival_table_from_events_raises_value_error_if_too_early_births(): n = 10 T = np.arange(0, n) C = [True] * n min_obs = T.copy() min_obs[1] = min_obs[1] + 10 with pytest.raises(ValueError): utils.survival_table_from_events(T, C, min_obs)
def test_survival_table_from_events_with_non_trivial_censorship_column(): T = np.random.exponential(5, size=50) malformed_C = np.random.binomial(2, p=0.8) # set to 2 on purpose! proper_C = malformed_C > 0 # (proper "boolean" array) table1 = utils.survival_table_from_events(T, malformed_C, np.zeros_like(T)) table2 = utils.survival_table_from_events(T, proper_C, np.zeros_like(T)) assert_frame_equal(table1, table2)
def test_survival_table_from_events_raises_value_error_if_too_early_births(): n = 10 T = np.arange(0, n) C = [True] * n min_obs = T.copy() min_obs[1] = min_obs[1] + 10 with pytest.raises(ValueError): utils.survival_table_from_events(T, C, min_obs)
def preprocess_inputs(durations, event_observed, timeline, entry): n = len(durations) durations = np.asarray(durations).reshape((n, )) # set to all observed if event_observed is none if event_observed is None: event_observed = np.ones(n, dtype=int) else: event_observed = np.asarray(event_observed).reshape( (n, )).copy().astype(int) if entry is None: entry = np.zeros(n) else: entry = np.asarray(entry).reshape((n, )) event_table = survival_table_from_events(durations, event_observed, entry) if timeline is None: timeline = event_table.index.values else: timeline = np.asarray(timeline) return durations, event_observed, timeline.astype( float), entry, event_table
def test_survival_events_from_table_with_ties(): T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 1, 1]) d = utils.survival_table_from_events(T, C) T_, C_, W_ = utils.survival_events_from_table(d[["censored", "observed"]]) npt.assert_array_equal([1, 2, 3, 4, 5], T_) npt.assert_array_equal([1, 0, 1, 1, 1], C_) npt.assert_array_equal([1, 1, 1, 2, 1], W_)
def test_survival_table_from_events_at_risk_column(): df = load_waltons() # from R expected = [163.0, 162.0, 160.0, 157.0, 154.0, 152.0, 151.0, 148.0, 144.0, 139.0, 134.0, 133.0, 130.0, 128.0, 126.0, 119.0, 118.0, 108.0, 107.0, 99.0, 96.0, 89.0, 87.0, 69.0, 65.0, 49.0, 38.0, 36.0, 27.0, 24.0, 14.0, 1.0] df = utils.survival_table_from_events(df['T'], df['E']) assert list(df['at_risk'][1:]) == expected # skip the first event as that is the birth time, 0.
def test_survival_table_from_events_at_risk_column(): df = load_waltons() # from R expected = [163.0, 162.0, 160.0, 157.0, 154.0, 152.0, 151.0, 148.0, 144.0, 139.0, 134.0, 133.0, 130.0, 128.0, 126.0, 119.0, 118.0, 108.0, 107.0, 99.0, 96.0, 89.0, 87.0, 69.0, 65.0, 49.0, 38.0, 36.0, 27.0, 24.0, 14.0, 1.0] df = utils.survival_table_from_events(df['T'], df['E']) assert list(df['at_risk'][1:]) == expected # skip the first event as that is the birth time, 0.
def test_survival_table_from_events_will_collapse_if_asked(): T, C = np.array([1, 3, 4, 5]), np.array([True, True, True, True]) table = utils.survival_table_from_events(T, C, collapse=True) assert table.index.tolist() == [ pd.Interval(0, 3.5089999999999999, closed="right"), pd.Interval(3.5089999999999999, 7.0179999999999998, closed="right"), ]
def test_survival_events_from_table_no_ties(): T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 0, 1]) d = utils.survival_table_from_events(T, C) T_, C_, W_ = utils.survival_events_from_table(d[["censored", "observed"]]) npt.assert_array_equal(T, T_) npt.assert_array_equal(C, C_) npt.assert_array_equal(W_, np.ones_like(T))
def test_survival_table_to_events_casts_to_float(): T, C = (np.array([1, 2, 3, 4, 4, 5]), np.array([True, False, True, True, True, True])) d = utils.survival_table_from_events(T, C, np.zeros_like(T)) npt.assert_array_equal(d["censored"].values, np.array([0.0, 0.0, 1.0, 0.0, 0.0, 0.0])) npt.assert_array_equal(d["removed"].values, np.array([0.0, 1.0, 1.0, 1.0, 2.0, 1.0]))
def test_survival_table_from_events_with_non_negative_T_and_no_lagged_births(): n = 10 T = np.arange(n) C = [True] * n min_obs = [0] * n df = utils.survival_table_from_events(T, C, min_obs) assert df.iloc[0]["entrance"] == n assert df.index[0] == T.min() assert df.index[-1] == T.max()
def test_survival_table_from_events_with_negative_T_and_lagged_births(): n = 10 T = np.arange(-n / 2, n / 2) C = [True] * n min_obs = np.linspace(-n / 2, 2, n) df = utils.survival_table_from_events(T, C, min_obs) assert df.iloc[0]["entrance"] == 1 assert df.index[0] == T.min() assert df.index[-1] == T.max()
def test_survival_table_from_events_with_negative_T_and_lagged_births(): n = 10 T = np.arange(-n / 2, n / 2) C = [True] * n min_obs = np.linspace(-n / 2, 2, n) df = utils.survival_table_from_events(T, C, min_obs) assert df.iloc[0]['entrance'] == 1 assert df.index[0] == T.min() assert df.index[-1] == T.max()
def test_survival_table_from_events_with_non_negative_T_and_no_lagged_births(): n = 10 T = np.arange(n) C = [True] * n min_obs = [0] * n df = utils.survival_table_from_events(T, C, min_obs) assert df.iloc[0]['entrance'] == n assert df.index[0] == T.min() assert df.index[-1] == T.max()
def test_survival_table_from_events_will_collapse_to_desired_bins(): T, C = np.array([1, 3, 4, 5]), np.array([True, True, True, True]) table = utils.survival_table_from_events(T, C, collapse=True, intervals=[0, 4, 8]) assert table.index.tolist() == [ pd.Interval(0, 4, closed="right"), pd.Interval(4, 8, closed="right") ]
def _compute_baseline_hazard(self, data, durations, event_observed, name): # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf ind_hazards = self.predict_partial_hazard(data) ind_hazards['event_at'] = durations ind_hazards_summed_over_durations = ind_hazards.groupby('event_at')[0].sum().sort_index(ascending=False).cumsum() ind_hazards_summed_over_durations.name = 'hazards' event_table = survival_table_from_events(durations, event_observed) event_table = event_table.join(ind_hazards_summed_over_durations) baseline_hazard = pd.DataFrame(event_table['observed'] / event_table['hazards'], columns=[name]).fillna(0) return baseline_hazard
def _compute_baseline_hazard(self, data, durations, event_observed, name): # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf ind_hazards = self.predict_partial_hazard(data) ind_hazards['event_at'] = durations ind_hazards_summed_over_durations = ind_hazards.groupby('event_at')[0].sum().sort_index(ascending=False).cumsum() ind_hazards_summed_over_durations.name = 'hazards' event_table = survival_table_from_events(durations, event_observed) event_table = event_table.join(ind_hazards_summed_over_durations) baseline_hazard = pd.DataFrame(event_table['observed'] / event_table['hazards'], columns=[name]).fillna(0) return baseline_hazard
def _compute_baseline_hazard(self, data, durations, event_observed, name): # https://stats.stackexchange.com/questions/46532/cox-baseline-hazard ind_hazards = self.predict_partial_hazard(data) ind_hazards['event_at'] = durations.values ind_hazards_summed_over_durations = ind_hazards.groupby('event_at')[0].sum().sort_index(ascending=False).cumsum() ind_hazards_summed_over_durations.name = 'hazards' event_table = survival_table_from_events(durations, event_observed) event_table = event_table.join(ind_hazards_summed_over_durations) baseline_hazard = pd.DataFrame(event_table['observed'] / event_table['hazards'], columns=[name]).fillna(0) return baseline_hazard
def fit(self, event_times, censorship=None, timeline=None, label='NA-estimate', alpha=None, insert_0=True): """ Parameters: event_times: an array, or pd.Series, of length n of times that the death event occured at timeline: return the best estimate at the values in timelines (postively increasing) censorship: an array, or pd.Series, of length n -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if censorship==None label: a string to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. insert_0: add a leading 0 (if not present) in the timeline. Returns: self, with new properties like 'survival_function_'. """ if censorship is None: self.censorship = np.ones(len(event_times), dtype=bool) else: self.censorship = np.array(censorship).copy().astype(bool) self.event_times = survival_table_from_events(event_times, self.censorship) if alpha is None: alpha = self.alpha if timeline is None: self.timeline = self.event_times.index.values.copy().astype(float) if insert_0 and self.timeline[0] > 0: self.timeline = np.insert(self.timeline, 0, 0.) else: self.timeline = np.array(timeline).copy().astype(float) cumulative_hazard_, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline, self._additive_f, self._variance_f) # esimates self.cumulative_hazard_ = pd.DataFrame(cumulative_hazard_, columns=[label]) self.confidence_interval_ = self._bounds(cumulative_sq_[:, None], alpha) self._cumulative_sq = cumulative_sq_ # estimation functions self.predict = _predict(self, "cumulative_hazard_", label) self.subtract = _subtract(self, "cumulative_hazard_") self.divide = _divide(self, "cumulative_hazard_") # plotting self.plot = plot_dataframes(self, "cumulative_hazard_") self.plot_cumulative_hazard = self.plot self.plot_hazard = plot_dataframes(self, 'hazard_') return self
def _compute_slopes(self): def _univariate_linear_regression_without_intercept(X, Y, weights): # normally (weights * X).dot(Y) / X.dot(weights * X), but we have a slightly different form here. beta = X.dot(Y) / X.dot(weights * X) errors = Y.values - np.outer(X, beta) var = (errors ** 2).sum(0) / (Y.shape[0] - 2) / X.dot(weights * X) return beta, np.sqrt(var) weights = survival_table_from_events(self.durations, self.event_observed).loc[self._index, "at_risk"].values y = (weights[:, None] * self.hazards_).cumsum() X = self._index.values betas, se = _univariate_linear_regression_without_intercept(X, y, weights) return pd.Series(betas, index=y.columns), pd.Series(se, index=y.columns)
def _compute_slopes(self): def _univariate_linear_regression_without_intercept(X, Y, weights): # normally (weights * X).dot(Y) / X.dot(weights * X), but we have a slightly different form here. beta = X.dot(Y) / X.dot(weights * X) errors = Y.values - np.outer(X, beta) var = (errors ** 2).sum(0) / (Y.shape[0] - 2) / X.dot(weights * X) return beta, np.sqrt(var) weights = survival_table_from_events(self.durations, self.event_observed).loc[self._index, "at_risk"].values y = (weights[:, None] * self.hazards_).cumsum() X = self._index.values betas, se = _univariate_linear_regression_without_intercept(X, y, weights) return pd.Series(betas, index=y.columns), pd.Series(se, index=y.columns)
def _compute_baseline_hazard(self): # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf ind_hazards = exp(np.dot(self.data, self.hazards_.T)) event_table = survival_table_from_events(self.durations.values, self.event_observed.values, np.zeros_like(self.durations)) n, d = event_table.shape baseline_hazard_ = pd.DataFrame(np.zeros((n, 1)), index=event_table.index, columns=['baseline hazard']) for t, s in event_table.iterrows(): baseline_hazard_.ix[t] = (s['observed'] / ind_hazards[self.durations <= t].sum()) return baseline_hazard_
def _compute_baseline_hazard(self): # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf ind_hazards = exp(np.dot(self.data, self.hazards_.T)) event_table = survival_table_from_events(self.durations.values, self.event_observed.values, np.zeros_like(self.durations)) n, d = event_table.shape baseline_hazard_ = pd.DataFrame(np.zeros((n, 1)), index=event_table.index, columns=['baseline hazard']) for t, s in event_table.iterrows(): baseline_hazard_.ix[t] = (s['observed'] / ind_hazards[self.durations <= t].sum()) return baseline_hazard_
def _summarize_survival(df, time_col, event_col, evaluate_at=None): ## prepare survival table table = survival_table_from_events(df[time_col], df[event_col]) table.reset_index(inplace=True) ## normalize survival as fraction of initial_n table['initial_n'] = table.loc[table['event_at'] == 0.0,'at_risk'][0] table['survival'] = table.apply(lambda row: row['at_risk']/row['initial_n'], axis=1) ## handle timepoints if given if evaluate_at is not None: evaluate_times = pd.DataFrame({'event_at': evaluate_at}) table = pd.merge(evaluate_times, table, on='event_at', how='outer') table = table.sort_values('event_at').fillna(method='ffill') table['keep'] = table['event_at'].apply(lambda x: x in evaluate_at) else: table['keep'] = True table = table.loc[table['keep'] == True,['event_at','survival']] table.rename(columns={'event_at': time_col}, inplace=True) return table
def fit(self, event_times, censorship=None, timeline=None, columns=['KM-estimate'], alpha=None, insert_0=True): """ Parameters: event_times: an (n,1) array of times that the death event occured at timeline: return the best estimate at the values in timelines (postively increasing) censorship: an (n,1) array of booleans -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if censorship==None columns: a length 1 array to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. insert_0: add a leading 0 (if not present) in the timeline. Returns: DataFrame with index either event_times or timelines (if not None), with values under column_name with the KaplanMeier estimate """ #set to all observed if censorship is none if censorship is None: self.censorship = np.ones(event_times.shape[0], dtype=bool) #why boolean? else: self.censorship = np.array(censorship).copy() if not alpha: alpha = self.alpha self.event_times = survival_table_from_events(event_times, self.censorship) if timeline is None: self.timeline = self.event_times.index.values.copy().astype(float) if insert_0 and self.timeline[0]>0: self.timeline = np.insert(self.timeline,0,0.) else: self.timeline = timeline.astype(float) log_survival_function, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline, self._additive_f, self._additive_var) self.survival_function_ = pd.DataFrame(np.exp(log_survival_function), columns=columns) #self.median_ = median_survival_times(self.survival_function_) self.confidence_interval_ = self._bounds(cumulative_sq_[:,None],alpha) self.plot = plot_dataframes(self, "survival_function_") self.plot_survival_function = self.plot self.median_ = median_survival_times(self.survival_function_) return self
def _summarize_survival(df, time_col, event_col, evaluate_at=None): ## prepare survival table table = survival_table_from_events(df[time_col], df[event_col]) table.reset_index(inplace=True) ## normalize survival as fraction of initial_n table['initial_n'] = max(table.at_risk) table['survival'] = table.apply( lambda row: row['at_risk'] / row['initial_n'], axis=1) ## handle timepoints if given if evaluate_at is not None: evaluate_times = pd.DataFrame({'event_at': evaluate_at}) table = pd.merge(evaluate_times, table, on='event_at', how='outer') table = table.sort_values('event_at').fillna(method='ffill') table['keep'] = table['event_at'].apply(lambda x: x in evaluate_at) else: table['keep'] = True table = table.loc[table['keep'] == True, ['event_at', 'survival']] table.rename(columns={'event_at': time_col}, inplace=True) return table
def _compute_baseline_hazard(self, data, durations, event_observed, weights, name): # https://stats.stackexchange.com/questions/46532/cox-baseline-hazard ind_hazards = self.predict_partial_hazard(data) * weights[:, None] ind_hazards["event_at"] = durations.values ind_hazards_summed_over_durations = ( ind_hazards.groupby("event_at")[0].sum().sort_index( ascending=False).cumsum()) ind_hazards_summed_over_durations.name = "hazards" event_table = survival_table_from_events(durations, event_observed, weights=weights) event_table = event_table.join(ind_hazards_summed_over_durations) baseline_hazard = pd.DataFrame(event_table["observed"] / event_table["hazards"], columns=[name]).fillna(0) return baseline_hazard
def fit(self, event_times,censorship=None, timeline=None, columns=['NA-estimate'], alpha=None, insert_0=True): """ Parameters: event_times: an (n,1) array of times that the death event occured at timeline: return the best estimate at the values in timelines (postively increasing) columns: a length 1 array to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. insert_0: add a leading 0 (if not present) in the timeline. Returns: DataFrame with index either event_times or timelines (if not None), with values as the NelsonAalen estimate """ if censorship is None: self.censorship = np.ones(event_times.shape[0], dtype=bool) #why boolean? else: self.censorship = np.array(censorship).copy().astype(bool) self.event_times = survival_table_from_events(event_times, self.censorship) if alpha is None: alpha = self.alpha if timeline is None: self.timeline = self.event_times.index.values.copy().astype(float) if insert_0 and self.timeline[0]>0: self.timeline = np.insert(self.timeline,0,0.) else: self.timeline = timeline.astype(float) cumulative_hazard_, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline, self._additive_f, self._variance_f ) self.cumulative_hazard_ = pd.DataFrame(cumulative_hazard_, columns=columns) self.confidence_interval_ = self._bounds(cumulative_sq_[:,None],alpha) self.plot = plot_dataframes(self, "cumulative_hazard_") self.plot_cumulative_hazard = self.plot self.plot_hazard = plot_dataframes(self, 'hazard_') self._cumulative_sq = cumulative_sq_ return self
def _compute_baseline_hazard(self): # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf ind_hazards = self.predict_partial_hazard(self.data).values event_table = survival_table_from_events(self.durations.values, self.event_observed.values) baseline_hazard_ = pd.DataFrame(np.zeros((event_table.shape[0], 1)), index=event_table.index, columns=['baseline hazard']) for t, s in event_table.iterrows(): less = np.array(self.durations >= t) if ind_hazards[less].sum() == 0: v = 0 else: v = (s['observed'] / ind_hazards[less].sum()) baseline_hazard_.ix[t] = v return baseline_hazard_
def _compute_baseline_hazard(self): # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf ind_hazards = self.predict_partial_hazard(self.data).values event_table = survival_table_from_events(self.durations.values, self.event_observed.values) baseline_hazard_ = pd.DataFrame(np.zeros((event_table.shape[0], 1)), index=event_table.index, columns=['baseline hazard']) for t, s in event_table.iterrows(): less = np.array(self.durations >= t) if ind_hazards[less].sum() == 0: v = 0 else: v = (s['observed'] / ind_hazards[less].sum()) baseline_hazard_.ix[t] = v return baseline_hazard_
def preprocess_inputs(durations, event_observed, timeline, entry): n = len(durations) durations = np.asarray(durations).reshape((n,)) # set to all observed if event_observed is none if event_observed is None: event_observed = np.ones(n, dtype=int) else: event_observed = np.asarray(event_observed).reshape((n,)).copy().astype(int) if entry is None: entry = np.zeros(n) else: entry = np.asarray(entry).reshape((n,)) event_table = survival_table_from_events(durations, event_observed, entry) if timeline is None: timeline = event_table.index.values else: timeline = np.asarray(timeline) return durations, event_observed, timeline.astype(float), entry, event_table
df = df[df['Site'] == "glottic"] df = df[df['Ethnicity'] == "white"] from lifelines.utils import survival_table_from_events df1 = df[df['Tcategory'] == 'T3'] df1['Censor'][df['Censor'] == 0] = 1 df1['Censor'][df['Censor'] == 1] = 0 df2 = df[df['Tcategory'] == 'T4'] df2['Censor'][df['Censor'] == 0] = 1 df2['Censor'][df['Censor'] == 1] = 0 T = df1['OS'] E = df1['Censor'] table = survival_table_from_events(df1['OS'], df1['Censor']) table2 = survival_table_from_events(df2['OS'], df2['Censor']) print table.head() from lifelines import KaplanMeierFitter kmf = KaplanMeierFitter() kmf.fit(T, event_observed=E, label="T3") # more succiently, kmf.fit(T,E) print kmf.survival_function_ ax = kmf.plot() T = df2['OS'] E = df2['Censor'] kmf.fit(T, event_observed=E, label="T4") # more succiently, kmf.fit(T,E) kmf.plot(ax=ax)
def test_survival_table_from_events_binned_with_empty_bin(): df = load_waltons() ix = df["group"] == "miR-137" event_table = utils.survival_table_from_events( df.loc[ix]["T"], df.loc[ix]["E"], intervals=[0, 10, 20, 30, 40, 50]) assert not pd.isnull(event_table).any().any()
def test_survival_table_from_events_will_collapse_if_asked(): T, C = np.array([1, 3, 4, 5]), np.array([True, True, True, True]) table = utils.survival_table_from_events(T, C, collapse=True) assert table.index.tolist() == [pd.Interval(0, 3.5089999999999999, closed='right'), pd.Interval(3.5089999999999999, 7.0179999999999998, closed='right')]
def test_survival_table_from_events_will_collapse_to_desired_bins(): T, C = np.array([1, 3, 4, 5]), np.array([True, True, True, True]) table = utils.survival_table_from_events(T, C, collapse=True, intervals=[0, 4, 8]) assert table.index.tolist() == [pd.Interval(0, 4, closed='right'), pd.Interval(4, 8, closed='right')]
""" removed observed censored entrance at_risk event_at 0 0 0 0 163 163 6 1 1 0 0 163 7 2 1 1 0 162 9 3 3 0 0 160 13 3 3 0 0 157 """ # start_times is a vector of datetime objects # end_times is a vector of (possibly missing) datetime objects. import numpy as np # traditional way to gen survival time # gen xS fro survival time and xC for censor time # compare two of them if xS < xC implies that event happen else not xS = np.random.exponential(1,10) xC = np.random.exponential(1,10) obsE = xS<xC obsT = [] for i in range(0,10): if obsE[i]: obsT.append(xS[i]) else : obsT.append(xC[i]) table = survival_table_from_events(obsT,obsE) print(table.head())
def test_survival_table_to_events(): T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 1, 1]) d = utils.survival_table_from_events(T, C, np.zeros_like(T)) T_, C_ = utils.survival_events_from_table(d[['censored', 'observed']]) npt.assert_array_equal(T, T_) npt.assert_array_equal(C, C_)
xlabel='Days of follow-up', legend_labels=['Male', 'Female'], ylabel='Survival probability', title='Overall survival in lung cancer patients') # Cox PH regression from lifelines import CoxPHFitter cph = CoxPHFitter() lungdata['age_grp'] = np.where(lungdata['age'] >= 65, 1, 0) lungdata['sex'] = np.where(lungdata['sex'] == 1, 1, 2) lung_cph = lungdata[['time', 'status_km', 'sex', 'age', 'wt.loss']] # only keep vars needed for model lung_cph = lung_cph[ lung_cph['wt.loss'].notna()] # drop nan values so the model can converge cph.fit(lung_cph, 'time', event_col='status_km') cph.print_summary() # extract stats for the multivariate table & save as .csv format cph.summary.to_csv(results_folder + 'multivariate_table.csv') # miscellaneous ... # look at the survival table from lifelines.utils import survival_table_from_events table = survival_table_from_events(T, E) print(table.head())
def test_survival_table_to_events(): T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 1, 1]) d = utils.survival_table_from_events(T, C, np.zeros_like(T)) T_, C_ = utils.survival_events_from_table(d[['censored', 'observed']]) npt.assert_array_equal(T, T_) npt.assert_array_equal(C, C_)
def test_survival_table_to_events_casts_to_float(): T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([True, False, True, True, True, True]) d = utils.survival_table_from_events(T, C, np.zeros_like(T)) npt.assert_array_equal(d['censored'].values, np.array([0., 0., 1., 0., 0., 0.])) npt.assert_array_equal(d['removed'].values, np.array([0., 1., 1., 1., 2., 1.]))