예제 #1
0
def test_survival_table_from_events_raises_value_error_if_too_early_births():
    n = 10
    T = np.arange(0, n)
    C = [True] * n
    min_obs = T.copy()
    min_obs[1] = min_obs[1] + 10
    with pytest.raises(ValueError):
        utils.survival_table_from_events(T, C, min_obs)
예제 #2
0
def test_survival_table_from_events_with_non_trivial_censorship_column():
    T = np.random.exponential(5, size=50)
    malformed_C = np.random.binomial(2, p=0.8)  # set to 2 on purpose!
    proper_C = malformed_C > 0  # (proper "boolean" array)
    table1 = utils.survival_table_from_events(T, malformed_C, np.zeros_like(T))
    table2 = utils.survival_table_from_events(T, proper_C, np.zeros_like(T))

    assert_frame_equal(table1, table2)
예제 #3
0
def test_survival_table_from_events_raises_value_error_if_too_early_births():
    n = 10
    T = np.arange(0, n)
    C = [True] * n
    min_obs = T.copy()
    min_obs[1] = min_obs[1] + 10
    with pytest.raises(ValueError):
        utils.survival_table_from_events(T, C, min_obs)
예제 #4
0
def preprocess_inputs(durations, event_observed, timeline, entry):

    n = len(durations)
    durations = np.asarray(durations).reshape((n, ))

    # set to all observed if event_observed is none
    if event_observed is None:
        event_observed = np.ones(n, dtype=int)
    else:
        event_observed = np.asarray(event_observed).reshape(
            (n, )).copy().astype(int)

    if entry is None:
        entry = np.zeros(n)
    else:
        entry = np.asarray(entry).reshape((n, ))

    event_table = survival_table_from_events(durations, event_observed, entry)

    if timeline is None:
        timeline = event_table.index.values
    else:
        timeline = np.asarray(timeline)

    return durations, event_observed, timeline.astype(
        float), entry, event_table
예제 #5
0
def test_survival_events_from_table_with_ties():
    T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 1, 1])
    d = utils.survival_table_from_events(T, C)
    T_, C_, W_ = utils.survival_events_from_table(d[["censored", "observed"]])
    npt.assert_array_equal([1, 2, 3, 4, 5], T_)
    npt.assert_array_equal([1, 0, 1, 1, 1], C_)
    npt.assert_array_equal([1, 1, 1, 2, 1], W_)
예제 #6
0
def test_survival_table_from_events_at_risk_column():
    df = load_waltons()
    # from R
    expected = [163.0, 162.0, 160.0, 157.0, 154.0, 152.0, 151.0, 148.0, 144.0, 139.0, 134.0, 133.0, 130.0, 128.0, 126.0, 119.0, 118.0, 
                108.0, 107.0, 99.0, 96.0, 89.0, 87.0, 69.0, 65.0, 49.0, 38.0, 36.0, 27.0, 24.0, 14.0, 1.0]
    df = utils.survival_table_from_events(df['T'], df['E'])
    assert list(df['at_risk'][1:]) == expected # skip the first event as that is the birth time, 0.
예제 #7
0
def test_survival_table_from_events_at_risk_column():
    df = load_waltons()
    # from R
    expected = [163.0, 162.0, 160.0, 157.0, 154.0, 152.0, 151.0, 148.0, 144.0, 139.0, 134.0, 133.0, 130.0, 128.0, 126.0, 119.0, 118.0,
                108.0, 107.0, 99.0, 96.0, 89.0, 87.0, 69.0, 65.0, 49.0, 38.0, 36.0, 27.0, 24.0, 14.0, 1.0]
    df = utils.survival_table_from_events(df['T'], df['E'])
    assert list(df['at_risk'][1:]) == expected  # skip the first event as that is the birth time, 0.
예제 #8
0
def test_survival_table_from_events_will_collapse_if_asked():
    T, C = np.array([1, 3, 4, 5]), np.array([True, True, True, True])
    table = utils.survival_table_from_events(T, C, collapse=True)
    assert table.index.tolist() == [
        pd.Interval(0, 3.5089999999999999, closed="right"),
        pd.Interval(3.5089999999999999, 7.0179999999999998, closed="right"),
    ]
예제 #9
0
def test_survival_events_from_table_no_ties():
    T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 0, 1])
    d = utils.survival_table_from_events(T, C)
    T_, C_, W_ = utils.survival_events_from_table(d[["censored", "observed"]])
    npt.assert_array_equal(T, T_)
    npt.assert_array_equal(C, C_)
    npt.assert_array_equal(W_, np.ones_like(T))
예제 #10
0
def test_survival_table_to_events_casts_to_float():
    T, C = (np.array([1, 2, 3, 4, 4,
                      5]), np.array([True, False, True, True, True, True]))
    d = utils.survival_table_from_events(T, C, np.zeros_like(T))
    npt.assert_array_equal(d["censored"].values,
                           np.array([0.0, 0.0, 1.0, 0.0, 0.0, 0.0]))
    npt.assert_array_equal(d["removed"].values,
                           np.array([0.0, 1.0, 1.0, 1.0, 2.0, 1.0]))
예제 #11
0
def test_survival_table_from_events_with_non_negative_T_and_no_lagged_births():
    n = 10
    T = np.arange(n)
    C = [True] * n
    min_obs = [0] * n
    df = utils.survival_table_from_events(T, C, min_obs)
    assert df.iloc[0]["entrance"] == n
    assert df.index[0] == T.min()
    assert df.index[-1] == T.max()
예제 #12
0
def test_survival_table_from_events_with_negative_T_and_lagged_births():
    n = 10
    T = np.arange(-n / 2, n / 2)
    C = [True] * n
    min_obs = np.linspace(-n / 2, 2, n)
    df = utils.survival_table_from_events(T, C, min_obs)
    assert df.iloc[0]["entrance"] == 1
    assert df.index[0] == T.min()
    assert df.index[-1] == T.max()
예제 #13
0
def test_survival_table_from_events_with_negative_T_and_lagged_births():
    n = 10
    T = np.arange(-n / 2, n / 2)
    C = [True] * n
    min_obs = np.linspace(-n / 2, 2, n)
    df = utils.survival_table_from_events(T, C, min_obs)
    assert df.iloc[0]['entrance'] == 1
    assert df.index[0] == T.min()
    assert df.index[-1] == T.max()
예제 #14
0
def test_survival_table_from_events_with_non_negative_T_and_no_lagged_births():
    n = 10
    T = np.arange(n)
    C = [True] * n
    min_obs = [0] * n
    df = utils.survival_table_from_events(T, C, min_obs)
    assert df.iloc[0]['entrance'] == n
    assert df.index[0] == T.min()
    assert df.index[-1] == T.max()
예제 #15
0
def test_survival_table_from_events_will_collapse_to_desired_bins():
    T, C = np.array([1, 3, 4, 5]), np.array([True, True, True, True])
    table = utils.survival_table_from_events(T,
                                             C,
                                             collapse=True,
                                             intervals=[0, 4, 8])
    assert table.index.tolist() == [
        pd.Interval(0, 4, closed="right"),
        pd.Interval(4, 8, closed="right")
    ]
예제 #16
0
    def _compute_baseline_hazard(self, data, durations, event_observed, name):
        # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf
        ind_hazards = self.predict_partial_hazard(data)
        ind_hazards['event_at'] = durations
        ind_hazards_summed_over_durations = ind_hazards.groupby('event_at')[0].sum().sort_index(ascending=False).cumsum()
        ind_hazards_summed_over_durations.name = 'hazards'

        event_table = survival_table_from_events(durations, event_observed)
        event_table = event_table.join(ind_hazards_summed_over_durations)
        baseline_hazard = pd.DataFrame(event_table['observed'] / event_table['hazards'], columns=[name]).fillna(0)
        return baseline_hazard
예제 #17
0
    def _compute_baseline_hazard(self, data, durations, event_observed, name):
        # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf
        ind_hazards = self.predict_partial_hazard(data)
        ind_hazards['event_at'] = durations
        ind_hazards_summed_over_durations = ind_hazards.groupby('event_at')[0].sum().sort_index(ascending=False).cumsum()
        ind_hazards_summed_over_durations.name = 'hazards'

        event_table = survival_table_from_events(durations, event_observed)
        event_table = event_table.join(ind_hazards_summed_over_durations)
        baseline_hazard = pd.DataFrame(event_table['observed'] / event_table['hazards'], columns=[name]).fillna(0)
        return baseline_hazard
예제 #18
0
    def _compute_baseline_hazard(self, data, durations, event_observed, name):
        # https://stats.stackexchange.com/questions/46532/cox-baseline-hazard
        ind_hazards = self.predict_partial_hazard(data)
        ind_hazards['event_at'] = durations.values
        ind_hazards_summed_over_durations = ind_hazards.groupby('event_at')[0].sum().sort_index(ascending=False).cumsum()
        ind_hazards_summed_over_durations.name = 'hazards'

        event_table = survival_table_from_events(durations, event_observed)
        event_table = event_table.join(ind_hazards_summed_over_durations)
        baseline_hazard = pd.DataFrame(event_table['observed'] / event_table['hazards'], columns=[name]).fillna(0)
        return baseline_hazard
예제 #19
0
    def fit(self, event_times, censorship=None, timeline=None, label='NA-estimate', alpha=None, insert_0=True):
        """
        Parameters:
          event_times: an array, or pd.Series, of length n of times that the death event occured at
          timeline: return the best estimate at the values in timelines (postively increasing)
          censorship: an array, or pd.Series, of length n -- True if the the death was observed, False if the event
             was lost (right-censored). Defaults all True if censorship==None
          label: a string to name the column of the estimate.
          alpha: the alpha value in the confidence intervals. Overrides the initializing
             alpha for this call to fit only.
          insert_0: add a leading 0 (if not present) in the timeline.

        Returns:
          self, with new properties like 'survival_function_'.

        """

        if censorship is None:
            self.censorship = np.ones(len(event_times), dtype=bool)
        else:
            self.censorship = np.array(censorship).copy().astype(bool)

        self.event_times = survival_table_from_events(event_times, self.censorship)

        if alpha is None:
            alpha = self.alpha

        if timeline is None:
            self.timeline = self.event_times.index.values.copy().astype(float)
            if insert_0 and self.timeline[0] > 0:
                self.timeline = np.insert(self.timeline, 0, 0.)
        else:
            self.timeline = np.array(timeline).copy().astype(float)

        cumulative_hazard_, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline,
                                                                self._additive_f, self._variance_f)

        # esimates
        self.cumulative_hazard_ = pd.DataFrame(cumulative_hazard_, columns=[label])
        self.confidence_interval_ = self._bounds(cumulative_sq_[:, None], alpha)
        self._cumulative_sq = cumulative_sq_

        # estimation functions
        self.predict = _predict(self, "cumulative_hazard_", label)
        self.subtract = _subtract(self, "cumulative_hazard_")
        self.divide = _divide(self, "cumulative_hazard_")

        # plotting
        self.plot = plot_dataframes(self, "cumulative_hazard_")
        self.plot_cumulative_hazard = self.plot
        self.plot_hazard = plot_dataframes(self, 'hazard_')

        return self
예제 #20
0
    def _compute_slopes(self):
        def _univariate_linear_regression_without_intercept(X, Y, weights):
            # normally (weights * X).dot(Y) / X.dot(weights * X), but we have a slightly different form here.
            beta = X.dot(Y) / X.dot(weights * X)
            errors = Y.values - np.outer(X, beta)
            var = (errors ** 2).sum(0) / (Y.shape[0] - 2) / X.dot(weights * X)
            return beta, np.sqrt(var)

        weights = survival_table_from_events(self.durations, self.event_observed).loc[self._index, "at_risk"].values
        y = (weights[:, None] * self.hazards_).cumsum()
        X = self._index.values
        betas, se = _univariate_linear_regression_without_intercept(X, y, weights)
        return pd.Series(betas, index=y.columns), pd.Series(se, index=y.columns)
    def _compute_slopes(self):
        def _univariate_linear_regression_without_intercept(X, Y, weights):
            # normally (weights * X).dot(Y) / X.dot(weights * X), but we have a slightly different form here.
            beta = X.dot(Y) / X.dot(weights * X)
            errors = Y.values - np.outer(X, beta)
            var = (errors ** 2).sum(0) / (Y.shape[0] - 2) / X.dot(weights * X)
            return beta, np.sqrt(var)

        weights = survival_table_from_events(self.durations, self.event_observed).loc[self._index, "at_risk"].values
        y = (weights[:, None] * self.hazards_).cumsum()
        X = self._index.values
        betas, se = _univariate_linear_regression_without_intercept(X, y, weights)
        return pd.Series(betas, index=y.columns), pd.Series(se, index=y.columns)
예제 #22
0
    def _compute_baseline_hazard(self):
        # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf
        ind_hazards = exp(np.dot(self.data, self.hazards_.T))

        event_table = survival_table_from_events(self.durations.values,
                                                 self.event_observed.values,
                                                 np.zeros_like(self.durations))
        n, d = event_table.shape

        baseline_hazard_ = pd.DataFrame(np.zeros((n, 1)),
                                        index=event_table.index,
                                        columns=['baseline hazard'])
        for t, s in event_table.iterrows():
            baseline_hazard_.ix[t] = (s['observed'] /
                                      ind_hazards[self.durations <= t].sum())

        return baseline_hazard_
예제 #23
0
    def _compute_baseline_hazard(self):
        # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf
        ind_hazards = exp(np.dot(self.data, self.hazards_.T))

        event_table = survival_table_from_events(self.durations.values,
                                                 self.event_observed.values,
                                                 np.zeros_like(self.durations))
        n, d = event_table.shape

        baseline_hazard_ = pd.DataFrame(np.zeros((n, 1)),
                                        index=event_table.index,
                                        columns=['baseline hazard'])
        for t, s in event_table.iterrows():
            baseline_hazard_.ix[t] = (s['observed'] /
                                      ind_hazards[self.durations <= t].sum())

        return baseline_hazard_
예제 #24
0
def _summarize_survival(df, time_col, event_col, evaluate_at=None):
    ## prepare survival table
    table = survival_table_from_events(df[time_col], df[event_col])
    table.reset_index(inplace=True)
    ## normalize survival as fraction of initial_n
    table['initial_n'] = table.loc[table['event_at'] == 0.0,'at_risk'][0]
    table['survival'] = table.apply(lambda row: row['at_risk']/row['initial_n'], axis=1)
    ## handle timepoints if given
    if evaluate_at is not None:
        evaluate_times = pd.DataFrame({'event_at': evaluate_at})
        table = pd.merge(evaluate_times, table, on='event_at', how='outer')
        table = table.sort_values('event_at').fillna(method='ffill')
        table['keep'] = table['event_at'].apply(lambda x: x in evaluate_at)
    else:
        table['keep'] = True
    table = table.loc[table['keep'] == True,['event_at','survival']]
    table.rename(columns={'event_at': time_col}, inplace=True)
    return table
예제 #25
0
  def fit(self, event_times, censorship=None, timeline=None, columns=['KM-estimate'], alpha=None, insert_0=True):
       """
       Parameters:
         event_times: an (n,1) array of times that the death event occured at 
         timeline: return the best estimate at the values in timelines (postively increasing)
         censorship: an (n,1) array of booleans -- True if the the death was observed, False if the event 
            was lost (right-censored). Defaults all True if censorship==None
         columns: a length 1 array to name the column of the estimate.
         alpha: the alpha value in the confidence intervals. Overrides the initializing
            alpha for this call to fit only. 
         insert_0: add a leading 0 (if not present) in the timeline.

       Returns:
         DataFrame with index either event_times or timelines (if not None), with
         values under column_name with the KaplanMeier estimate
       """
       #set to all observed if censorship is none
       if censorship is None:
          self.censorship = np.ones(event_times.shape[0], dtype=bool) #why boolean?
       else:
          self.censorship = np.array(censorship).copy()

       if not alpha:
          alpha = self.alpha

       self.event_times = survival_table_from_events(event_times, self.censorship)

       if timeline is None:
          self.timeline = self.event_times.index.values.copy().astype(float)
          if insert_0 and self.timeline[0]>0:
              self.timeline = np.insert(self.timeline,0,0.)
       else:
          self.timeline = timeline.astype(float)

       log_survival_function, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline,
                                                                  self._additive_f, self._additive_var)

       self.survival_function_ = pd.DataFrame(np.exp(log_survival_function), columns=columns)
       #self.median_ = median_survival_times(self.survival_function_)
       self.confidence_interval_ = self._bounds(cumulative_sq_[:,None],alpha)
       self.plot = plot_dataframes(self, "survival_function_")
       self.plot_survival_function = self.plot
       self.median_ = median_survival_times(self.survival_function_)
       return self
예제 #26
0
def _summarize_survival(df, time_col, event_col, evaluate_at=None):
    ## prepare survival table
    table = survival_table_from_events(df[time_col], df[event_col])
    table.reset_index(inplace=True)
    ## normalize survival as fraction of initial_n
    table['initial_n'] = max(table.at_risk)
    table['survival'] = table.apply(
        lambda row: row['at_risk'] / row['initial_n'], axis=1)
    ## handle timepoints if given
    if evaluate_at is not None:
        evaluate_times = pd.DataFrame({'event_at': evaluate_at})
        table = pd.merge(evaluate_times, table, on='event_at', how='outer')
        table = table.sort_values('event_at').fillna(method='ffill')
        table['keep'] = table['event_at'].apply(lambda x: x in evaluate_at)
    else:
        table['keep'] = True
    table = table.loc[table['keep'] == True, ['event_at', 'survival']]
    table.rename(columns={'event_at': time_col}, inplace=True)
    return table
예제 #27
0
    def _compute_baseline_hazard(self, data, durations, event_observed,
                                 weights, name):
        # https://stats.stackexchange.com/questions/46532/cox-baseline-hazard
        ind_hazards = self.predict_partial_hazard(data) * weights[:, None]
        ind_hazards["event_at"] = durations.values
        ind_hazards_summed_over_durations = (
            ind_hazards.groupby("event_at")[0].sum().sort_index(
                ascending=False).cumsum())
        ind_hazards_summed_over_durations.name = "hazards"

        event_table = survival_table_from_events(durations,
                                                 event_observed,
                                                 weights=weights)
        event_table = event_table.join(ind_hazards_summed_over_durations)
        baseline_hazard = pd.DataFrame(event_table["observed"] /
                                       event_table["hazards"],
                                       columns=[name]).fillna(0)

        return baseline_hazard
예제 #28
0
    def fit(self, event_times,censorship=None, timeline=None, columns=['NA-estimate'], alpha=None, insert_0=True):
        """
        Parameters:
          event_times: an (n,1) array of times that the death event occured at 
          timeline: return the best estimate at the values in timelines (postively increasing)
          columns: a length 1 array to name the column of the estimate.
          alpha: the alpha value in the confidence intervals. Overrides the initializing
             alpha for this call to fit only. 
          insert_0: add a leading 0 (if not present) in the timeline.


        Returns:
          DataFrame with index either event_times or timelines (if not None), with
          values as the NelsonAalen estimate
        """
        
        if censorship is None:
           self.censorship = np.ones(event_times.shape[0], dtype=bool) #why boolean?
        else:
           self.censorship = np.array(censorship).copy().astype(bool)
        self.event_times = survival_table_from_events(event_times, self.censorship)

        if alpha is None:
            alpha = self.alpha

        if timeline is None:
           self.timeline = self.event_times.index.values.copy().astype(float)
           if insert_0 and self.timeline[0]>0:
               self.timeline = np.insert(self.timeline,0,0.)
        else:
           self.timeline = timeline.astype(float)

        cumulative_hazard_, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline,
                                                                     self._additive_f, self._variance_f )
        self.cumulative_hazard_ = pd.DataFrame(cumulative_hazard_, columns=columns)
        self.confidence_interval_ = self._bounds(cumulative_sq_[:,None],alpha)
        self.plot = plot_dataframes(self, "cumulative_hazard_")
        self.plot_cumulative_hazard = self.plot
        self.plot_hazard = plot_dataframes(self, 'hazard_')
        self._cumulative_sq = cumulative_sq_

        return self
예제 #29
0
    def _compute_baseline_hazard(self):
        # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf
        ind_hazards = self.predict_partial_hazard(self.data).values

        event_table = survival_table_from_events(self.durations.values,
                                                 self.event_observed.values)

        baseline_hazard_ = pd.DataFrame(np.zeros((event_table.shape[0], 1)),
                                        index=event_table.index,
                                        columns=['baseline hazard'])

        for t, s in event_table.iterrows():
            less = np.array(self.durations >= t)
            if ind_hazards[less].sum() == 0:
                v = 0
            else:
                v = (s['observed'] / ind_hazards[less].sum())
            baseline_hazard_.ix[t] = v

        return baseline_hazard_
예제 #30
0
    def _compute_baseline_hazard(self):
        # http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf
        ind_hazards = self.predict_partial_hazard(self.data).values

        event_table = survival_table_from_events(self.durations.values,
                                                 self.event_observed.values)

        baseline_hazard_ = pd.DataFrame(np.zeros((event_table.shape[0], 1)),
                                        index=event_table.index,
                                        columns=['baseline hazard'])

        for t, s in event_table.iterrows():
            less = np.array(self.durations >= t)
            if ind_hazards[less].sum() == 0:
                v = 0
            else:
                v = (s['observed'] / ind_hazards[less].sum())
            baseline_hazard_.ix[t] = v

        return baseline_hazard_
예제 #31
0
def preprocess_inputs(durations, event_observed, timeline, entry):

    n = len(durations)
    durations = np.asarray(durations).reshape((n,))

    # set to all observed if event_observed is none
    if event_observed is None:
        event_observed = np.ones(n, dtype=int)
    else:
        event_observed = np.asarray(event_observed).reshape((n,)).copy().astype(int)

    if entry is None:
        entry = np.zeros(n)
    else:
        entry = np.asarray(entry).reshape((n,))

    event_table = survival_table_from_events(durations, event_observed, entry)

    if timeline is None:
        timeline = event_table.index.values
    else:
        timeline = np.asarray(timeline)

    return durations, event_observed, timeline.astype(float), entry, event_table
예제 #32
0
df = df[df['Site'] == "glottic"]
df = df[df['Ethnicity'] == "white"]

from lifelines.utils import survival_table_from_events

df1 = df[df['Tcategory'] == 'T3']
df1['Censor'][df['Censor'] == 0] = 1
df1['Censor'][df['Censor'] == 1] = 0

df2 = df[df['Tcategory'] == 'T4']
df2['Censor'][df['Censor'] == 0] = 1
df2['Censor'][df['Censor'] == 1] = 0

T = df1['OS']
E = df1['Censor']
table = survival_table_from_events(df1['OS'], df1['Censor'])
table2 = survival_table_from_events(df2['OS'], df2['Censor'])
print table.head()

from lifelines import KaplanMeierFitter
kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E, label="T3")  # more succiently, kmf.fit(T,E)
print kmf.survival_function_

ax = kmf.plot()

T = df2['OS']
E = df2['Censor']
kmf.fit(T, event_observed=E, label="T4")  # more succiently, kmf.fit(T,E)
kmf.plot(ax=ax)
예제 #33
0
파일: test_utils.py 프로젝트: vck/lifelines
def test_survival_table_from_events_binned_with_empty_bin():
    df = load_waltons()
    ix = df["group"] == "miR-137"
    event_table = utils.survival_table_from_events(
        df.loc[ix]["T"], df.loc[ix]["E"], intervals=[0, 10, 20, 30, 40, 50])
    assert not pd.isnull(event_table).any().any()
예제 #34
0
def test_survival_table_from_events_will_collapse_if_asked():
    T, C = np.array([1, 3, 4, 5]), np.array([True, True, True, True])
    table = utils.survival_table_from_events(T, C, collapse=True)
    assert table.index.tolist() == [pd.Interval(0, 3.5089999999999999, closed='right'), pd.Interval(3.5089999999999999,  7.0179999999999998, closed='right')]
예제 #35
0
def test_survival_table_from_events_will_collapse_to_desired_bins():
    T, C = np.array([1, 3, 4, 5]), np.array([True, True, True, True])
    table = utils.survival_table_from_events(T, C, collapse=True, intervals=[0, 4, 8])
    assert table.index.tolist() == [pd.Interval(0, 4, closed='right'), pd.Interval(4,  8, closed='right')]
예제 #36
0
"""
          removed  observed  censored  entrance  at_risk
event_at
0               0         0         0       163      163
6               1         1         0         0      163
7               2         1         1         0      162
9               3         3         0         0      160
13              3         3         0         0      157
"""
# start_times is a vector of datetime objects
# end_times is a vector of (possibly missing) datetime objects.

import numpy as np

# traditional way to gen survival time
# gen xS fro survival time and xC for censor time
# compare two of them if xS < xC implies that event happen else not
xS = np.random.exponential(1,10)
xC = np.random.exponential(1,10)
obsE = xS<xC

obsT = []
for i in range(0,10):
    if obsE[i]:
        obsT.append(xS[i])
    else :
        obsT.append(xC[i])

table = survival_table_from_events(obsT,obsE)
print(table.head())
예제 #37
0
def test_survival_table_to_events():
    T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 1, 1])
    d = utils.survival_table_from_events(T, C, np.zeros_like(T))
    T_, C_ = utils.survival_events_from_table(d[['censored', 'observed']])
    npt.assert_array_equal(T, T_)
    npt.assert_array_equal(C, C_)
예제 #38
0
               xlabel='Days of follow-up',
               legend_labels=['Male', 'Female'],
               ylabel='Survival probability',
               title='Overall survival in lung cancer patients')

# Cox PH regression
from lifelines import CoxPHFitter

cph = CoxPHFitter()

lungdata['age_grp'] = np.where(lungdata['age'] >= 65, 1, 0)
lungdata['sex'] = np.where(lungdata['sex'] == 1, 1, 2)

lung_cph = lungdata[['time', 'status_km', 'sex', 'age',
                     'wt.loss']]  # only keep vars needed for model
lung_cph = lung_cph[
    lung_cph['wt.loss'].notna()]  # drop nan values so the model can converge

cph.fit(lung_cph, 'time', event_col='status_km')
cph.print_summary()

# extract stats for the multivariate table & save as .csv format
cph.summary.to_csv(results_folder + 'multivariate_table.csv')

# miscellaneous ...
# look at the survival table
from lifelines.utils import survival_table_from_events

table = survival_table_from_events(T, E)
print(table.head())
예제 #39
0
def test_survival_table_to_events():
    T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 1, 1])
    d = utils.survival_table_from_events(T, C, np.zeros_like(T))
    T_, C_ = utils.survival_events_from_table(d[['censored', 'observed']])
    npt.assert_array_equal(T, T_)
    npt.assert_array_equal(C, C_)
예제 #40
0
def test_survival_table_to_events_casts_to_float():
    T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([True, False, True, True, True, True])
    d = utils.survival_table_from_events(T, C, np.zeros_like(T))
    npt.assert_array_equal(d['censored'].values, np.array([0.,  0.,  1.,  0.,  0.,  0.]))
    npt.assert_array_equal(d['removed'].values, np.array([0.,  1.,  1.,  1.,  2.,  1.]))