def _get_log_lik(self, X_pt, events_pt, start_pt, stop_pt, weights_pt, net): """ Calculate the pytorch compatibale log-likelihood ------- X: (m, d) tensor of covariates, events: (1, d) tensor of events start: (1, d) tensor of start times stop: (1, d) tensor of stop times weights: (1, d) tensor of weight times net: the current state of nonlinear link function h(t|x) = h_0(t|x)exp(net(x)) """ log_lik = None events_pt = events_pt.to(torch.bool) unique_death_times = np.unique(stop_pt[events_pt]) for t in reversed(unique_death_times): ix = (start_pt < t) & (t <= stop_pt) X_at_t_pt = X_pt[ix] weights_at_t_pt = weights_pt[ix][:, None] stops_events_at_t_pt = stop_pt[ix] events_at_t_pt = events_pt[ix] phi_i = weights_at_t_pt * torch.exp(net(X_at_t_pt)) risk_phi = torch.sum(phi_i, dim=0) # Calculate sums of risk set. deaths_pt = events_at_t_pt & (stops_events_at_t_pt == t) deaths = deaths_pt.detach().numpy() ties_counts = array_sum_to_scalar(deaths) phi_death_sum = torch.sum(phi_i[deaths], dim=0) weight_count = torch.sum(weights_at_t_pt[deaths], axis=0) weighted_average = weight_count / ties_counts if log_lik is None: log_lik = torch.zeros(1, requires_grad=False) # No tie. for l in range(ties_counts): if ties_counts > 1: increasing_proportion = l / ties_counts denom = risk_phi - increasing_proportion * phi_death_sum else: denom = risk_phi log_lik -= weighted_average * torch.log(denom) log_lik += phi_death_sum return log_lik
def _get_gradients(self, X, events, start, stop, weights, beta): # pylint: disable=too-many-locals """ Calculates the first and second order vector differentials, with respect to beta. Returns ------- hessian: (d, d) numpy array, gradient: (d,) numpy array log_likelihood: float """ _, d = X.shape hessian = np.zeros((d, d)) gradient = np.zeros(d) log_lik = 0 # weights = weights[:, None] unique_death_times = np.unique(stop[events]) for t in unique_death_times: # I feel like this can be made into some tree-like structure ix = (start < t) & (t <= stop) X_at_t = X[ix] weights_at_t = weights[ix] stops_events_at_t = stop[ix] events_at_t = events[ix] phi_i = weights_at_t * np.exp(np.dot(X_at_t, beta)) phi_x_i = phi_i[:, None] * X_at_t phi_x_x_i = np.dot(X_at_t.T, phi_x_i) # Calculate sums of Risk set risk_phi = array_sum_to_scalar(phi_i) risk_phi_x = matrix_axis_0_sum_to_1d_array(phi_x_i) risk_phi_x_x = phi_x_x_i # Calculate the sums of Tie set deaths = events_at_t & (stops_events_at_t == t) tied_death_counts = array_sum_to_scalar( deaths.astype(int)) # should always at least 1. Why? TODO xi_deaths = X_at_t[deaths] x_death_sum = matrix_axis_0_sum_to_1d_array( weights_at_t[deaths, None] * xi_deaths) weight_count = array_sum_to_scalar(weights_at_t[deaths]) weighted_average = weight_count / tied_death_counts # # This code is near identical to the _batch algorithm in CoxPHFitter. In fact, see _batch for comments. # if tied_death_counts > 1: # A good explaination for how Efron handles ties. Consider three of five subjects who fail at the time. # As it is not known a priori that who is the first to fail, so one-third of # (φ1 + φ2 + φ3) is adjusted from sum_j^{5} φj after one fails. Similarly two-third # of (φ1 + φ2 + φ3) is adjusted after first two individuals fail, etc. # a lot of this is now in einstien notation for performance, but see original "expanded" code here # https://github.com/CamDavidsonPilon/lifelines/blob/e7056e7817272eb5dff5983556954f56c33301b1/lifelines/fitters/cox_time_varying_fitter.py#L458-L490 tie_phi = array_sum_to_scalar(phi_i[deaths]) tie_phi_x = matrix_axis_0_sum_to_1d_array(phi_x_i[deaths]) tie_phi_x_x = np.dot(xi_deaths.T, phi_i[deaths, None] * xi_deaths) increasing_proportion = np.arange( tied_death_counts) / tied_death_counts denom = 1.0 / (risk_phi - increasing_proportion * tie_phi) numer = risk_phi_x - np.outer(increasing_proportion, tie_phi_x) a1 = np.einsum("ab, i->ab", risk_phi_x_x, denom) - np.einsum( "ab, i->ab", tie_phi_x_x, increasing_proportion * denom) else: # no tensors here, but do some casting to make it easier in the converging step next. denom = 1.0 / np.array([risk_phi]) numer = risk_phi_x a1 = risk_phi_x_x * denom summand = numer * denom[:, None] a2 = summand.T.dot(summand) gradient = gradient + x_death_sum - weighted_average * summand.sum( 0) log_lik = log_lik + np.dot( x_death_sum, beta) + weighted_average * np.log(denom).sum() hessian = hessian + weighted_average * (a2 - a1) return hessian, gradient, log_lik