Exemple #1
0
def logrank_test(event_times_A, event_times_B, censorship_A = None, censorship_B=None, t_0 = -1):
  """
  Measures and reports on whether two intensity processes are different. That is, given two 
  event series, determines whether the data generating processes are statistically different. 

  See Survival and Event Analysis, page 108. This implicitly uses the log-rank weights.

  Parameters:
    event_times_X: a (nx1) array of event times (deaths,...) for the population.
    t_0: the period under observation, -1 for all time.

  """

  if censorship_A is None:
    censorship_A = np.ones((event_times_A.shape[0], 1))  
  if censorship_B is None:
    censorship_B = np.ones((event_times_B.shape[0], 1))

  if t_0 == -1: 
    t_0 = np.max([event_times_A.max(), event_times_B.max()])
  
  event_times_AB = dataframe_from_events_censorship( np.append(event_times_A,event_times_B),
                                                     np.append( censorship_A, censorship_B) )

  event_times_A = dataframe_from_events_censorship( event_times_A, censorship_A)
  event_times_B = dataframe_from_events_censorship( event_times_B, censorship_B)

  N_dot = event_times_AB[["observed"]].cumsum()
  Y_dot = event_times_AB["removed"].sum() - event_times_AB["removed"].cumsum()
  Y_1 = event_times_A["removed"].sum() - event_times_A["removed"].cumsum()
  Y_2 = event_times_B["removed"].sum() - event_times_B["removed"].cumsum()

  v = 0
  v_sq = 0
  y_1 = Y_1.ix[0]
  y_2 = Y_2.ix[0]
  for t, n_t in N_dot.ix[N_dot.index<=t_0].itertuples():
    try:
      y_1 = Y_1.loc[t]
    except KeyError:
      pass      
    try:  
      y_2 = Y_2.loc[t]
    except KeyError:
      pass
    y_dot = Y_dot.loc[t]
    if y_dot != 0:
      v += 1.*y_1/y_dot
      v_sq += (1.*y_2*y_1)/(y_dot**2)
  E_1 = v
  N_1 = event_times_A[["observed"]].sum()[0]
  Z_1 = N_1 - E_1
  U = Z_1/np.sqrt(v_sq)
  return U
Exemple #2
0
  def fit(self, event_times, timeline=None, censorship=None, columns=['KM-estimate']):
       """
       Parameters:
         event_times: an (n,1) array of times that the death event occured at 
         timeline: return the best estimate at the values in timelines (postively increasing)
         censorship: an (n,1) array of booleans -- True if the the death was observed, False if the event 
            was lost (right-censored). Defaults all True if censorship==None
       Returns:
         DataFrame with index either event_times or timelines (if not None), with
         values as the NelsonAalen estimate
       """
       #need to sort event_times
       if censorship is None:
          self.censorship = np.ones_like(event_times, dtype=bool) #why boolean?
       else:
          self.censorship = censorship.copy()

       self.event_times = dataframe_from_events_censorship(event_times, self.censorship)

       if timeline is None:
          self.timeline = self.event_times.index.values.copy()
       else:
          self.timeline = timeline
       log_surivial_function, cumulative_sq_ = _additive_estimate(self.event_times, self.timeline, 
                                                                  self.censorship, self.additive_f, 
                                                                  0, columns )
       self.survival_function_ = np.exp(log_surivial_function)
       self.confidence_interval_ = self._bounds(cumulative_sq_)
       self.plot = plot_dataframes(self, "survival_function_")
       return self
Exemple #3
0
    def fit(self, event_times, timeline=None, censorship=None, columns=['NA-estimate']):
        """
        Parameters:
          event_times: an (n,1) array of times that the death event occured at 
          timeline: return the best estimate at the values in timelines (postively increasing)

        Returns:
          DataFrame with index either event_times or timelines (if not None), with
          values as the NelsonAalen estimate
        """
        
        if censorship is None:
           self.censorship = np.ones_like(event_times, dtype=bool) #why boolean?
        else:
           self.censorship = censorship.copy()

        self.event_times = dataframe_from_events_censorship(event_times, self.censorship)

        if timeline is None:
           self.timeline = self.event_times.index.values.copy()
        else:
           self.timeline = timeline
        self.cumulative_hazard_, cumulative_sq_ = _additive_estimate(self.event_times, 
                                                                     self.timeline, self.censorship, 
                                                                     self.additive_f, 0, columns,
                                                                     nelson_aalen_smoothing=self.nelson_aalen_smoothing )
        self.confidence_interval_ = self._bounds(cumulative_sq_)
        self.plot = plot_dataframes(self, "cumulative_hazard_")

        return
Exemple #4
0
    def fit(self, event_times,censorship=None, timeline=None, columns=['NA-estimate'],alpha=None):
        """
        Parameters:
          event_times: an (n,1) array of times that the death event occured at 
          timeline: return the best estimate at the values in timelines (postively increasing)
          columns: a length 1 array to name the column of the estimate.
          alpha: the alpha value in the confidence intervals. Overrides the initializing
             alpha for this call to fit only. 

        Returns:
          DataFrame with index either event_times or timelines (if not None), with
          values as the NelsonAalen estimate
        """
        
        if censorship is None:
           self.censorship = np.ones_like(event_times, dtype=bool) #why boolean?
        else:
           self.censorship = censorship.copy().astype(bool)
        self.event_times = dataframe_from_events_censorship(event_times, self.censorship)

        if alpha is None:
            alpha = self.alpha

        if timeline is None:
           self.timeline = self.event_times.index.values.copy().astype(float)
        else:
           self.timeline = timeline
        self.cumulative_hazard_, cumulative_sq_ = _additive_estimate(self.event_times, 
                                                                     self.timeline, self._additive_f,
                                                                      columns, self._variance_f )
        self.confidence_interval_ = self._bounds(cumulative_sq_,alpha)
        self.plot = plot_dataframes(self, "cumulative_hazard_")

        return
Exemple #5
0
  def fit(self, event_times, censorship=None, timeline=None, columns=['KM-estimate'], alpha=None, insert_0=True):
       """
       Parameters:
         event_times: an (n,1) array of times that the death event occured at 
         timeline: return the best estimate at the values in timelines (postively increasing)
         censorship: an (n,1) array of booleans -- True if the the death was observed, False if the event 
            was lost (right-censored). Defaults all True if censorship==None
         columns: a length 1 array to name the column of the estimate.
         alpha: the alpha value in the confidence intervals. Overrides the initializing
            alpha for this call to fit only. 
         insert_0: add a leading 0 (if not present) in the timeline.

       Returns:
         DataFrame with index either event_times or timelines (if not None), with
         values under column_name with the KaplanMeier estimate
       """
       #set to all observed if censorship is none
       if censorship is None:
          self.censorship = np.ones_like(event_times, dtype=bool) #why boolean?
       else:
          self.censorship = np.array(censorship).copy()

       if not alpha:
          alpha = self.alpha

       self.event_times = dataframe_from_events_censorship(event_times, self.censorship)

       if timeline is None:
          self.timeline = self.event_times.index.values.copy()
       else:
          self.timeline = timeline
       log_surivial_function, cumulative_sq_ = _additive_estimate(self.event_times, 
                                                                  self.timeline, self._additive_f, 
                                                                   columns, self._variance_f, insert_0 )
       self.survival_function_ = np.exp(log_surivial_function)
       self.median_ = median_survival_times(self.survival_function_)
       self.confidence_interval_ = self._bounds(cumulative_sq_,alpha)
       self.plot = plot_dataframes(self, "survival_function_")
       return self
Exemple #6
0
def logrank_test(event_times_A, event_times_B, censorship_A=None, censorship_B=None, alpha=0.95, t_0=-1):
  """
  Measures and reports on whether two intensity processes are different. That is, given two 
  event series, determines whether the data generating processes are statistically different. 

  Pre lifelines 0.2: this returned a test statistic. 
  Post lifelines 0.2: this returns the results of the entire test. 

  See Survival and Event Analysis, page 108. This implicitly uses the log-rank weights.

  Parameters:
    event_times_X: a (nx1) array of event durations (birth to death,...) for the population.
    t_0: the period under observation, -1 for all time.

  Returns 
    summary: a print-friendly string detailing the results of the test.
    p: the p-value
    Z: the test result: True if reject the null, (pendantically None if inconclusive)
  """
  if censorship_A is None:
    censorship_A = np.ones((event_times_A.shape[0], 1))  
  if censorship_B is None:
    censorship_B = np.ones((event_times_B.shape[0], 1))

  if t_0 == -1: 
    t_0 = np.max([event_times_A.max(), event_times_B.max()])
  
  event_times_AB = dataframe_from_events_censorship( np.append(event_times_A,event_times_B),
                                                     np.append( censorship_A, censorship_B) )

  event_times_A = dataframe_from_events_censorship( event_times_A, censorship_A)
  event_times_B = dataframe_from_events_censorship( event_times_B, censorship_B)

  N_dot = event_times_AB[["observed"]].cumsum()
  Y_dot = event_times_AB["removed"].sum() - event_times_AB["removed"].cumsum()
  Y_1 = event_times_A["removed"].sum() - event_times_A["removed"].cumsum()
  Y_2 = event_times_B["removed"].sum() - event_times_B["removed"].cumsum()
  v = 0
  v_sq = 0
  y_1 = Y_1.iloc[0]
  y_2 = Y_2.iloc[0]
  for t, n_t in N_dot.ix[N_dot.index<=t_0].itertuples():
    try:
      #sorta a nasty hack to check of the time is not in the 
      # data. Could be done better.
      y_1 = Y_1.loc[t]
    except KeyError:
      pass  
    except IndexError:
      pass
    try:  
      y_2 = Y_2.loc[t]
    except KeyError:
      pass
    except IndexError:
      pass
    y_dot = Y_dot.loc[t]
    if y_dot != 0:
      v += 1.*y_1/y_dot
      v_sq += (1.*y_2*y_1)/(y_dot**2)

  E_1 = v
  N_1 = event_times_A[["observed"]].sum()[0]
  Z_1 = N_1 - E_1
  U = Z_1/np.sqrt(v_sq) #this is approx normal under null.
  
  test_result, p_value = z_test(U,alpha)
  summary = pretty_print_summary(test_result, p_value, U, t_0=t_0, test='logrank', alpha=alpha)
  return summary, p_value, test_result