def _get_log_lik(self, X_pt, events_pt, start_pt, stop_pt, weights_pt,
                     net):
        """
        Calculate the pytorch compatibale log-likelihood
        -------
        X: (m, d) tensor of covariates,
        events: (1, d) tensor of events
        start: (1, d) tensor of start times
        stop: (1, d) tensor of stop times
        weights: (1, d) tensor of weight times
        net: the current state of nonlinear link function h(t|x) = h_0(t|x)exp(net(x))
        """
        log_lik = None
        events_pt = events_pt.to(torch.bool)
        unique_death_times = np.unique(stop_pt[events_pt])
        for t in reversed(unique_death_times):
            ix = (start_pt < t) & (t <= stop_pt)
            X_at_t_pt = X_pt[ix]
            weights_at_t_pt = weights_pt[ix][:, None]
            stops_events_at_t_pt = stop_pt[ix]
            events_at_t_pt = events_pt[ix]

            phi_i = weights_at_t_pt * torch.exp(net(X_at_t_pt))
            risk_phi = torch.sum(phi_i, dim=0)  # Calculate sums of risk set.

            deaths_pt = events_at_t_pt & (stops_events_at_t_pt == t)
            deaths = deaths_pt.detach().numpy()
            ties_counts = array_sum_to_scalar(deaths)
            phi_death_sum = torch.sum(phi_i[deaths], dim=0)
            weight_count = torch.sum(weights_at_t_pt[deaths], axis=0)
            weighted_average = weight_count / ties_counts

            if log_lik is None:
                log_lik = torch.zeros(1, requires_grad=False)

            # No tie.
            for l in range(ties_counts):
                if ties_counts > 1:
                    increasing_proportion = l / ties_counts
                    denom = risk_phi - increasing_proportion * phi_death_sum
                else:
                    denom = risk_phi
                log_lik -= weighted_average * torch.log(denom)
            log_lik += phi_death_sum
        return log_lik
Exemple #2
0
    def _get_gradients(self, X, events, start, stop, weights, beta):  # pylint: disable=too-many-locals
        """
        Calculates the first and second order vector differentials, with respect to beta.

        Returns
        -------
        hessian: (d, d) numpy array,
        gradient: (d,) numpy array
        log_likelihood: float
        """

        _, d = X.shape
        hessian = np.zeros((d, d))
        gradient = np.zeros(d)
        log_lik = 0
        # weights = weights[:, None]
        unique_death_times = np.unique(stop[events])

        for t in unique_death_times:

            # I feel like this can be made into some tree-like structure
            ix = (start < t) & (t <= stop)

            X_at_t = X[ix]
            weights_at_t = weights[ix]
            stops_events_at_t = stop[ix]
            events_at_t = events[ix]

            phi_i = weights_at_t * np.exp(np.dot(X_at_t, beta))
            phi_x_i = phi_i[:, None] * X_at_t
            phi_x_x_i = np.dot(X_at_t.T, phi_x_i)

            # Calculate sums of Risk set
            risk_phi = array_sum_to_scalar(phi_i)
            risk_phi_x = matrix_axis_0_sum_to_1d_array(phi_x_i)
            risk_phi_x_x = phi_x_x_i

            # Calculate the sums of Tie set
            deaths = events_at_t & (stops_events_at_t == t)

            tied_death_counts = array_sum_to_scalar(
                deaths.astype(int))  # should always at least 1. Why? TODO

            xi_deaths = X_at_t[deaths]

            x_death_sum = matrix_axis_0_sum_to_1d_array(
                weights_at_t[deaths, None] * xi_deaths)

            weight_count = array_sum_to_scalar(weights_at_t[deaths])
            weighted_average = weight_count / tied_death_counts

            #
            # This code is near identical to the _batch algorithm in CoxPHFitter. In fact, see _batch for comments.
            #

            if tied_death_counts > 1:

                # A good explaination for how Efron handles ties. Consider three of five subjects who fail at the time.
                # As it is not known a priori that who is the first to fail, so one-third of
                # (φ1 + φ2 + φ3) is adjusted from sum_j^{5} φj after one fails. Similarly two-third
                # of (φ1 + φ2 + φ3) is adjusted after first two individuals fail, etc.

                # a lot of this is now in einstien notation for performance, but see original "expanded" code here
                # https://github.com/CamDavidsonPilon/lifelines/blob/e7056e7817272eb5dff5983556954f56c33301b1/lifelines/fitters/cox_time_varying_fitter.py#L458-L490

                tie_phi = array_sum_to_scalar(phi_i[deaths])
                tie_phi_x = matrix_axis_0_sum_to_1d_array(phi_x_i[deaths])
                tie_phi_x_x = np.dot(xi_deaths.T,
                                     phi_i[deaths, None] * xi_deaths)

                increasing_proportion = np.arange(
                    tied_death_counts) / tied_death_counts
                denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
                numer = risk_phi_x - np.outer(increasing_proportion, tie_phi_x)

                a1 = np.einsum("ab, i->ab", risk_phi_x_x, denom) - np.einsum(
                    "ab, i->ab", tie_phi_x_x, increasing_proportion * denom)
            else:
                # no tensors here, but do some casting to make it easier in the converging step next.
                denom = 1.0 / np.array([risk_phi])
                numer = risk_phi_x
                a1 = risk_phi_x_x * denom

            summand = numer * denom[:, None]
            a2 = summand.T.dot(summand)

            gradient = gradient + x_death_sum - weighted_average * summand.sum(
                0)
            log_lik = log_lik + np.dot(
                x_death_sum, beta) + weighted_average * np.log(denom).sum()
            hessian = hessian + weighted_average * (a2 - a1)

        return hessian, gradient, log_lik