def logrank_test(event_times_A, event_times_B, censorship_A = None, censorship_B=None, t_0 = -1): """ Measures and reports on whether two intensity processes are different. That is, given two event series, determines whether the data generating processes are statistically different. See Survival and Event Analysis, page 108. This implicitly uses the log-rank weights. Parameters: event_times_X: a (nx1) array of event times (deaths,...) for the population. t_0: the period under observation, -1 for all time. """ if censorship_A is None: censorship_A = np.ones((event_times_A.shape[0], 1)) if censorship_B is None: censorship_B = np.ones((event_times_B.shape[0], 1)) if t_0 == -1: t_0 = np.max([event_times_A.max(), event_times_B.max()]) event_times_AB = dataframe_from_events_censorship( np.append(event_times_A,event_times_B), np.append( censorship_A, censorship_B) ) event_times_A = dataframe_from_events_censorship( event_times_A, censorship_A) event_times_B = dataframe_from_events_censorship( event_times_B, censorship_B) N_dot = event_times_AB[["observed"]].cumsum() Y_dot = event_times_AB["removed"].sum() - event_times_AB["removed"].cumsum() Y_1 = event_times_A["removed"].sum() - event_times_A["removed"].cumsum() Y_2 = event_times_B["removed"].sum() - event_times_B["removed"].cumsum() v = 0 v_sq = 0 y_1 = Y_1.ix[0] y_2 = Y_2.ix[0] for t, n_t in N_dot.ix[N_dot.index<=t_0].itertuples(): try: y_1 = Y_1.loc[t] except KeyError: pass try: y_2 = Y_2.loc[t] except KeyError: pass y_dot = Y_dot.loc[t] if y_dot != 0: v += 1.*y_1/y_dot v_sq += (1.*y_2*y_1)/(y_dot**2) E_1 = v N_1 = event_times_A[["observed"]].sum()[0] Z_1 = N_1 - E_1 U = Z_1/np.sqrt(v_sq) return U
def fit(self, event_times, timeline=None, censorship=None, columns=['KM-estimate']): """ Parameters: event_times: an (n,1) array of times that the death event occured at timeline: return the best estimate at the values in timelines (postively increasing) censorship: an (n,1) array of booleans -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if censorship==None Returns: DataFrame with index either event_times or timelines (if not None), with values as the NelsonAalen estimate """ #need to sort event_times if censorship is None: self.censorship = np.ones_like(event_times, dtype=bool) #why boolean? else: self.censorship = censorship.copy() self.event_times = dataframe_from_events_censorship(event_times, self.censorship) if timeline is None: self.timeline = self.event_times.index.values.copy() else: self.timeline = timeline log_surivial_function, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline, self.censorship, self.additive_f, 0, columns ) self.survival_function_ = np.exp(log_surivial_function) self.confidence_interval_ = self._bounds(cumulative_sq_) self.plot = plot_dataframes(self, "survival_function_") return self
def fit(self, event_times, timeline=None, censorship=None, columns=['NA-estimate']): """ Parameters: event_times: an (n,1) array of times that the death event occured at timeline: return the best estimate at the values in timelines (postively increasing) Returns: DataFrame with index either event_times or timelines (if not None), with values as the NelsonAalen estimate """ if censorship is None: self.censorship = np.ones_like(event_times, dtype=bool) #why boolean? else: self.censorship = censorship.copy() self.event_times = dataframe_from_events_censorship(event_times, self.censorship) if timeline is None: self.timeline = self.event_times.index.values.copy() else: self.timeline = timeline self.cumulative_hazard_, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline, self.censorship, self.additive_f, 0, columns, nelson_aalen_smoothing=self.nelson_aalen_smoothing ) self.confidence_interval_ = self._bounds(cumulative_sq_) self.plot = plot_dataframes(self, "cumulative_hazard_") return
def fit(self, event_times,censorship=None, timeline=None, columns=['NA-estimate'],alpha=None): """ Parameters: event_times: an (n,1) array of times that the death event occured at timeline: return the best estimate at the values in timelines (postively increasing) columns: a length 1 array to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. Returns: DataFrame with index either event_times or timelines (if not None), with values as the NelsonAalen estimate """ if censorship is None: self.censorship = np.ones_like(event_times, dtype=bool) #why boolean? else: self.censorship = censorship.copy().astype(bool) self.event_times = dataframe_from_events_censorship(event_times, self.censorship) if alpha is None: alpha = self.alpha if timeline is None: self.timeline = self.event_times.index.values.copy().astype(float) else: self.timeline = timeline self.cumulative_hazard_, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline, self._additive_f, columns, self._variance_f ) self.confidence_interval_ = self._bounds(cumulative_sq_,alpha) self.plot = plot_dataframes(self, "cumulative_hazard_") return
def fit(self, event_times, censorship=None, timeline=None, columns=['KM-estimate'], alpha=None, insert_0=True): """ Parameters: event_times: an (n,1) array of times that the death event occured at timeline: return the best estimate at the values in timelines (postively increasing) censorship: an (n,1) array of booleans -- True if the the death was observed, False if the event was lost (right-censored). Defaults all True if censorship==None columns: a length 1 array to name the column of the estimate. alpha: the alpha value in the confidence intervals. Overrides the initializing alpha for this call to fit only. insert_0: add a leading 0 (if not present) in the timeline. Returns: DataFrame with index either event_times or timelines (if not None), with values under column_name with the KaplanMeier estimate """ #set to all observed if censorship is none if censorship is None: self.censorship = np.ones_like(event_times, dtype=bool) #why boolean? else: self.censorship = np.array(censorship).copy() if not alpha: alpha = self.alpha self.event_times = dataframe_from_events_censorship(event_times, self.censorship) if timeline is None: self.timeline = self.event_times.index.values.copy() else: self.timeline = timeline log_surivial_function, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline, self._additive_f, columns, self._variance_f, insert_0 ) self.survival_function_ = np.exp(log_surivial_function) self.median_ = median_survival_times(self.survival_function_) self.confidence_interval_ = self._bounds(cumulative_sq_,alpha) self.plot = plot_dataframes(self, "survival_function_") return self
def logrank_test(event_times_A, event_times_B, censorship_A=None, censorship_B=None, alpha=0.95, t_0=-1): """ Measures and reports on whether two intensity processes are different. That is, given two event series, determines whether the data generating processes are statistically different. Pre lifelines 0.2: this returned a test statistic. Post lifelines 0.2: this returns the results of the entire test. See Survival and Event Analysis, page 108. This implicitly uses the log-rank weights. Parameters: event_times_X: a (nx1) array of event durations (birth to death,...) for the population. t_0: the period under observation, -1 for all time. Returns summary: a print-friendly string detailing the results of the test. p: the p-value Z: the test result: True if reject the null, (pendantically None if inconclusive) """ if censorship_A is None: censorship_A = np.ones((event_times_A.shape[0], 1)) if censorship_B is None: censorship_B = np.ones((event_times_B.shape[0], 1)) if t_0 == -1: t_0 = np.max([event_times_A.max(), event_times_B.max()]) event_times_AB = dataframe_from_events_censorship( np.append(event_times_A,event_times_B), np.append( censorship_A, censorship_B) ) event_times_A = dataframe_from_events_censorship( event_times_A, censorship_A) event_times_B = dataframe_from_events_censorship( event_times_B, censorship_B) N_dot = event_times_AB[["observed"]].cumsum() Y_dot = event_times_AB["removed"].sum() - event_times_AB["removed"].cumsum() Y_1 = event_times_A["removed"].sum() - event_times_A["removed"].cumsum() Y_2 = event_times_B["removed"].sum() - event_times_B["removed"].cumsum() v = 0 v_sq = 0 y_1 = Y_1.iloc[0] y_2 = Y_2.iloc[0] for t, n_t in N_dot.ix[N_dot.index<=t_0].itertuples(): try: #sorta a nasty hack to check of the time is not in the # data. Could be done better. y_1 = Y_1.loc[t] except KeyError: pass except IndexError: pass try: y_2 = Y_2.loc[t] except KeyError: pass except IndexError: pass y_dot = Y_dot.loc[t] if y_dot != 0: v += 1.*y_1/y_dot v_sq += (1.*y_2*y_1)/(y_dot**2) E_1 = v N_1 = event_times_A[["observed"]].sum()[0] Z_1 = N_1 - E_1 U = Z_1/np.sqrt(v_sq) #this is approx normal under null. test_result, p_value = z_test(U,alpha) summary = pretty_print_summary(test_result, p_value, U, t_0=t_0, test='logrank', alpha=alpha) return summary, p_value, test_result