コード例 #1
0
ファイル: estimation.py プロジェクト: fdeheeger/lifelines
  def fit(self, event_times, X, timeline = None, censorship=None, columns = None):
    """currently X is a static (n,d) array

    event_times: (1,n) array of event times
    X: (n,d) the design matrix 
    timeline: (1,t) timepoints in ascending order
    censorship: (1,n) boolean array of censorships: True if observed, False if right-censored.
                By default, assuming all are observed.

    Fits: self.cumulative_hazards_: a (t,d+1) dataframe of cumulative hazard coefficients
          self.hazards_: a (t,d+1) dataframe of hazard coefficients

    """

    n,d = X.shape
    X_ = X.copy()
    ix = event_times.argsort(1)[0,:]
    X_ = X_[ix,:].copy() if not self.fit_intercept else np.c_[ X_[ix,:].copy(), np.ones((n,1)) ]
    sorted_event_times = event_times[0,ix].copy()

    if columns is None:
      columns = range(d) + ["baseline"]
    else:
      columns =  [c for c in columns ] + ["baseline"]

    if censorship is None:
        observed = np.ones(n, dtype=bool)
    else:
        observed = censorship.reshape(n)

    if timeline is None:
        timeline = sorted_event_times
    
    zeros = np.zeros((timeline.shape[0],d+self.fit_intercept))
    
    self.cumulative_hazards_ = pd.DataFrame(zeros.copy() , index=timeline, columns = columns)
    self.hazards_ = pd.DataFrame(np.zeros((event_times.shape[1],d+self.fit_intercept)), index=sorted_event_times, columns = columns)
    self._variance = pd.DataFrame(zeros.copy(), index=timeline, columns = columns)
    
    penalizer = self.penalizer*np.eye(d + self.fit_intercept)
    
    t_0 = sorted_event_times[0]
    cum_v = np.zeros((d+self.fit_intercept,1))
    
    d = 0
    for i,time in enumerate(sorted_event_times):
        relevant_times = (t_0<timeline)*(timeline<=time)
        if observed[i] == 0:
          X_[i,:] = 0
        try:
          V = dot(inv(dot(X_.T,X_) - penalizer), X_.T)
        except LinAlgError:
          self.cumulative_hazards_.ix[relevant_times] =cum_v.T
          self.hazards_.iloc[i] = v.T 
          self._variance.ix[relevant_times] = dot( V[:,i][:,None], V[:,i][None,:] ).diagonal()
          X_[i,:] = 0
          t_0 = time
          continue
        v = dot(V, basis(n,i))
        cum_v = cum_v + v
        self.cumulative_hazards_.ix[relevant_times] = self.cumulative_hazards_.ix[relevant_times].values + cum_v.T
        self.hazards_.iloc[i] = self.hazards_.iloc[i].values + v.T
        self._variance.ix[relevant_times] = self._variance.ix[relevant_times].values + dot( V[:,i][:,None], V[:,i][None,:] ).diagonal()
        t_0 = time
        X_[i,:] = 0

    relevant_times = (timeline>time)
    self.cumulative_hazards_.ix[relevant_times] = cum_v.T
    self.hazards_.iloc[i] = v.T
    self._variance.ix[relevant_times] = dot( V[:,i][:,None], V[:,i][None,:] ).diagonal()
    self.timeline = timeline
    self.censorship = censorship
    self._compute_confidence_intervals()
    return self
コード例 #2
0
    def fit(self, event_times, X, timeline=None, censorship=None, columns=None):
        """currently X is a static (n,d) array

        event_times: (n,1) array of event times
        X: (n,d) the design matrix, either a numpy matrix or DataFrame.
        timeline: (t,1) timepoints in ascending order
        censorship: (n,1) boolean array of censorships: True if observed, False if right-censored.
                    By default, assuming all are observed.

        Fits: self.cumulative_hazards_: a (t,d+1) dataframe of cumulative hazard coefficients
              self.hazards_: a (t,d+1) dataframe of hazard coefficients

        """
        # deal with the covariate matrix. Check if it is a dataframe or numpy array
        n, d = X.shape
        if type(X) == pd.core.frame.DataFrame:
            X_ = X.values.copy()
            if columns is None:
                columns = X.columns
        else:
            X_ = X.copy()

        # append a columns of ones for the baseline hazard
        ix = event_times.argsort(0)[:, 0].copy()
        X_ = X_[ix,:].copy() if not self.fit_intercept else np.c_[ X_[ix,:].copy(), np.ones((n, 1)) ]
        sorted_event_times = event_times[ix, 0].copy()

        # set the column's names of the dataframe.
        if columns is None:
            columns = range(d)
        else:
            columns = [c for c in columns]

        if self.fit_intercept:
            columns += ['baseline']

        # set the censorship events. 1 if the death was observed.
        if censorship is None:
            observed = np.ones(n, dtype=bool)
        else:
            observed = censorship[ix].reshape(n)

        # set the timeline -- this is used as DataFrame index in the results
        if timeline is None:
            timeline = sorted_event_times.copy()

        timeline = np.unique(timeline.astype(float))
        if timeline[0] > 0:
            timeline = np.insert(timeline, 0, 0.)

        unique_times = np.unique(timeline)
        zeros = np.zeros((timeline.shape[0], d + self.fit_intercept))
        self.cumulative_hazards_ = pd.DataFrame(zeros.copy(), index=unique_times, columns=columns)
        self.hazards_ = pd.DataFrame(
            np.zeros((event_times.shape[0], d + self.fit_intercept)), index=event_times[:, 0], columns=columns)
        self._variance = pd.DataFrame(zeros.copy(), index=unique_times, columns=columns)

        # create the penalizer matrix for L2 regression
        penalizer = self.penalizer * np.eye(d + self.fit_intercept)

        t_0 = sorted_event_times[0]
        cum_v = np.zeros((d + self.fit_intercept, 1))
        v = cum_v.copy()
        for i, time in enumerate(sorted_event_times):
            relevant_times = (t_0 < timeline) * (timeline <= time)
            if observed[i] == 0:
                X_[i,:] = 0
            try:
                V = dot(inv(dot(X_.T, X_) + penalizer), X_.T)
            except LinAlgError:
                # if penalizer > 0, this should not occur. But sometimes it does...
                V = dot(pinv(dot(X_.T, X_) + penalizer), X_.T)

            v = dot(V, basis(n, i))
            cum_v = cum_v + v
            self.cumulative_hazards_.ix[relevant_times] = self.cumulative_hazards_.ix[relevant_times].values + cum_v.T
            self.hazards_.iloc[i] = self.hazards_.iloc[i].values + v.T
            self._variance.ix[relevant_times] = self._variance.ix[relevant_times].values + dot( V[:, i][:, None], V[:, i][None,:] ).diagonal()
            t_0 = time
            X_[i,:] = 0

        # clean up last iteration
        relevant_times = (timeline > time)
        self.hazards_.iloc[i] = v.T
        try:
            self.cumulative_hazards_.ix[relevant_times] = cum_v.T
            self._variance.ix[relevant_times] = dot( V[:, i][:, None], V[:, i][None,:] ).diagonal()
        except:
            pass
        self.timeline = timeline
        self.X = X
        self.censorship = censorship
        self.event_times = event_times
        self._compute_confidence_intervals()
        self.plot = plot_regressions(self)
        return self
コード例 #3
0
ファイル: estimation.py プロジェクト: Basqiat/lifelines
  def fit(self, event_times, X, timeline = None, censorship=None, columns=None):
    """currently X is a static (n,d) array

    event_times: (n,1) array of event times
    X: (n,d) the design matrix, either a numpy matrix or DataFrame.  
    timeline: (t,1) timepoints in ascending order
    censorship: (n,1) boolean array of censorships: True if observed, False if right-censored.
                By default, assuming all are observed.

    Fits: self.cumulative_hazards_: a (t,d+1) dataframe of cumulative hazard coefficients
          self.hazards_: a (t,d+1) dataframe of hazard coefficients

    """
    #deal with the covariate matrix. Check if it is a dataframe or numpy array
    n,d = X.shape
    if type(X)==pd.core.frame.DataFrame:
      X_ = X.values.copy()
      if columns is None:
        columns = X.columns
    else:
      X_ = X.copy()

    # append a columns of ones for the baseline hazard
    ix = event_times.argsort()[0,:]
    X_ = X_[ix,:].copy() if not self.fit_intercept else np.c_[ X_[ix,:].copy(), np.ones((n,1)) ]
    sorted_event_times = event_times[0,ix].copy()

    #set the column's names of the dataframe.
    if columns is None:
      columns = range(d) + ["baseline"]
    else:
      columns =  [c for c in columns ] + ["baseline"]

    #set the censorship events. 1 if the death was observed.
    if censorship is None:
        observed = np.ones(n, dtype=bool)
    else:
        observed = censorship.reshape(n)

    #set the timeline -- this is used as DataFrame index in the results
    if timeline is None:
        timeline = sorted_event_times

    timeline = timeline.astype(float)
    if timeline[0] > 0:
       timeline = np.insert(timeline,0,0.)
    
    zeros = np.zeros((timeline.shape[0],d+self.fit_intercept))
    self.cumulative_hazards_ = pd.DataFrame(zeros.copy() , index=timeline, columns = columns)
    self.hazards_ = pd.DataFrame(np.zeros((event_times.shape[1],d+self.fit_intercept)), index=sorted_event_times, columns = columns)
    self._variance = pd.DataFrame(zeros.copy(), index=timeline, columns = columns)
    
    #create the penalizer matrix for L2 regression
    penalizer = self.penalizer*np.eye(d + self.fit_intercept)
    
    t_0 = sorted_event_times[0]
    cum_v = np.zeros((d+self.fit_intercept,1))
    v = cum_v.copy()
    for i,time in enumerate(sorted_event_times):
        relevant_times = (t_0<timeline)*(timeline<=time)
        if observed[i] == 0:
          X_[i,:] = 0
        try:
          V = dot(inv(dot(X_.T,X_) - penalizer), X_.T)
        except LinAlgError:
          #if penalizer > 0, this should not occur.
          self.cumulative_hazards_.ix[relevant_times] =cum_v.T
          self.hazards_.iloc[i] = v.T 
          self._variance.ix[relevant_times] = dot( V[:,i][:,None], V[:,i][None,:] ).diagonal()
          X_[i,:] = 0
          t_0 = time
          continue

        v = dot(V, basis(n,i))
        cum_v = cum_v + v
        self.cumulative_hazards_.ix[relevant_times] = self.cumulative_hazards_.ix[relevant_times].values + cum_v.T
        self.hazards_.iloc[i] = self.hazards_.iloc[i].values + v.T
        self._variance.ix[relevant_times] = self._variance.ix[relevant_times].values + dot( V[:,i][:,None], V[:,i][None,:] ).diagonal()
        t_0 = time
        X_[i,:] = 0

    #clean up last iteration
    relevant_times = (timeline>time)
    self.cumulative_hazards_.ix[relevant_times] = cum_v.T
    self.hazards_.iloc[i] = v.T
    self._variance.ix[relevant_times] = dot( V[:,i][:,None], V[:,i][None,:] ).diagonal()
    self.timeline = timeline
    self.X = X
    self.censorship = censorship
    self._compute_confidence_intervals()
    return self
コード例 #4
0
ファイル: estimation.py プロジェクト: Gild/lifelines
    def fit(self, event_times, X, timeline=None, censorship=None, columns=None, verbose=True, debug=False):
        """currently X is a static (n,d) array

        event_times: (n,1) array of event times
        X: (n,d) the design matrix, either a numpy matrix or DataFrame.
        timeline: (t,1) timepoints in ascending order
        censorship: (n,1) boolean array of censorships: True if observed, False if right-censored.
                    By default, assuming all are observed.

        Fits: self.cumulative_hazards_: a (t,d+1) dataframe of cumulative hazard coefficients
              self.hazards_: a (t,d+1) dataframe of hazard coefficients

        """
        # deal with the covariate matrix. Check if it is a dataframe or numpy
        # array
        n, d = X.shape

        # append a columns of ones for the baseline hazard
        ix = event_times.argsort(0)[:, 0]
        baseline = np.ones((n, 1))
        X = np.hstack([X[ix,:], baseline])
        sorted_event_times = event_times[ix, 0]

        # set the column's names of the dataframe.
        if columns is None:
            columns = range(d)
        else:
            columns = [c for c in columns]

        if self.fit_intercept:
            columns += ['baseline']

        # set the censorship events. 1 if the death was observed.
        if censorship is None:
            observed = np.ones(n, dtype=bool)
        else:
            observed = censorship[ix].reshape(n)

        # set the timeline -- this is used as DataFrame index in the results
        if timeline is None:
            timeline = sorted_event_times

        timeline = np.unique(timeline.astype(float))
        if timeline[0] > 0:
            timeline = np.insert(timeline, 0, 0.)

        unique_times = np.unique(timeline)
        zeros = np.zeros((timeline.shape[0], d + self.fit_intercept))
        self.cumulative_hazards_ = pd.DataFrame(
            zeros.copy(), index=unique_times, columns=columns)
        self.hazards_ = pd.DataFrame(
            np.zeros((event_times.shape[0], d + self.fit_intercept)), index=event_times[:, 0], columns=columns)
        self._variance = pd.DataFrame(
            zeros.copy(), index=unique_times, columns=columns)

        # create the penalizer matrix for L2 regression
        penalizer = (self.penalizer * np.eye(d + self.fit_intercept)).astype(
            np.float32, copy=False)

        t_0 = sorted_event_times[0]
        cum_v = np.zeros((d + self.fit_intercept, 1))
        v = cum_v.copy()
        n_iters = len(sorted_event_times)
        for i, time in enumerate(sorted_event_times):
            if debug:
                pdb.set_trace()
            relevant_times = (t_0 < timeline) * (timeline <= time)
            if observed[i] == 0:
                X[i,:] = 0
            try:
                lr(df.values, relevant_individuals, c1=self.coef_penalizer, c2=self.smoothing_penalizer, offset=previous_hazard)
                #V = dot(inv(dot(X.T, X) + penalizer), X.T)
            except LinAlgError:
                pass
                # if penalizer > 0, this should not occur. But sometimes it does...
                #V = dot(pinv(dot(X.T, X) + penalizer), X.T)

            v = dot(V, basis(n, i))
            cum_v = cum_v + v
            self.cumulative_hazards_.ix[relevant_times] = self.cumulative_hazards_.ix[
                relevant_times].values + cum_v.T
            self.hazards_.iloc[i] = self.hazards_.iloc[i].values + v.T
            self._variance.ix[relevant_times] = self._variance.ix[
                relevant_times].values + dot( V[:, i][:, None], V[:, i][None,:] ).diagonal()
            t_0 = time
            X[i,:] = 0

            if verbose:
                sys.stdout.write("\r iteration %i of %i completed" % (i + 1, n_iters))
                sys.stdout.flush()

        # clean up last iteration
        relevant_times = (timeline > time)
        self.hazards_.iloc[i] = v.T
        try:
            self.cumulative_hazards_.ix[relevant_times] = cum_v.T
            self._variance.ix[relevant_times] =  dot( V[:, i][:, None], V[:, i][None,:] ).diagonal()
        except:
            pass
        self.timeline = timeline
        self.X = X
        self.censorship = censorship
        self.event_times = event_times
        self._compute_confidence_intervals()
        self.plot = plot_regressions(self)
        return self