def _newton_rhaphson( self, df, events, start, stop, weights, show_progress=False, step_size=None, precision=10e-6, max_steps=50, initial_point=None, ): # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements """ Newton Rhaphson algorithm for fitting CPH model. Parameters ---------- df: DataFrame stop_times_events: DataFrame meta information about the subjects history show_progress: boolean, optional (default: True) to show verbose output of convergence step_size: float > 0 to determine a starting step size in NR algorithm. precision: float the convergence halts if the norm of delta between successive positions is less than epsilon. Returns -------- beta: (1,d) numpy array. """ assert precision <= 1.0, "precision must be less than or equal to 1." _, d = df.shape # make sure betas are correct size. if initial_point is not None: beta = initial_point else: beta = np.zeros((d, )) i = 0 converging = True ll, previous_ll = 0, 0 start_time = time.time() step_sizer = StepSizer(step_size) step_size = step_sizer.next() while converging: i += 1 if self.strata is None: h, g, ll = self._get_gradients(df.values, events.values, start.values, stop.values, weights.values, beta) else: g = np.zeros_like(beta) h = np.zeros((d, d)) ll = 0 for _h, _g, _ll in self._partition_by_strata_and_apply( df, events, start, stop, weights, self._get_gradients, beta): g += _g h += _h ll += _ll if i == 1 and np.all(beta == 0): # this is a neat optimization, the null partial likelihood # is the same as the full partial but evaluated at zero. # if the user supplied a non-trivial initial point, we need to delay this. self._log_likelihood_null = ll if self.penalizer > 0: # add the gradient and hessian of the l2 term g -= self.penalizer * beta h.flat[::d + 1] -= self.penalizer try: # reusing a piece to make g * inv(h) * g.T faster later inv_h_dot_g_T = spsolve(-h, g, sym_pos=True) except ValueError as e: if "infs or NaNs" in str(e): raise ConvergenceError( """hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model """, e, ) else: # something else? raise e except LinAlgError as e: raise ConvergenceError( """Convergence halted due to matrix inversion problems. Suspicion is high colinearity. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model """, e, ) delta = step_size * inv_h_dot_g_T if np.any(np.isnan(delta)): raise ConvergenceError( """delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model """) # Save these as pending result hessian, gradient = h, g norm_delta = norm(delta) newton_decrement = g.dot(inv_h_dot_g_T) / 2 if show_progress: print( "\rIteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f" % (i, norm_delta, step_size, ll, newton_decrement, time.time() - start_time), end="", ) # convergence criteria if norm_delta < precision: converging, completed = False, True elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) < 1e-09: # this is what R uses by default converging, completed = False, True elif newton_decrement < 10e-8: converging, completed = False, True elif i >= max_steps: # 50 iterations steps with N-R is a lot. # Expected convergence is less than 10 steps converging, completed = False, False elif step_size <= 0.0001: converging, completed = False, False elif abs(ll) < 0.0001 and norm_delta > 1.0: warnings.warn( "The log-likelihood is getting suspiciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \ See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression", ConvergenceWarning, ) converging, completed = False, False step_size = step_sizer.update(norm_delta).next() beta += delta self._hessian_ = hessian self._score_ = gradient self._log_likelihood = ll if show_progress and completed: print("Convergence completed after %d iterations." % (i)) elif show_progress and not completed: print("Convergence failed. See any warning messages.") # report to the user problems that we detect. if completed and norm_delta > 0.1: warnings.warn( "Newton-Rhapson convergence completed but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is colinearity or complete separation in the dataset?" % norm_delta, ConvergenceWarning, ) elif not completed: warnings.warn( "Newton-Rhapson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning) return beta
def _newton_rhaphson(self, X, T, E, weights=None, initial_beta=None, step_size=None, precision=10e-6, show_progress=True, max_steps=50): """ Newton Rhaphson algorithm for fitting CPH model. Note that data is assumed to be sorted on T! Parameters: X: (n,d) Pandas DataFrame of observations. T: (n) Pandas Series representing observed durations. E: (n) Pandas Series representing death events. weights: (n) an iterable representing weights per observation. initial_beta: (1,d) numpy array of initial starting point for NR algorithm. Default 0. step_size: float > 0.001 to determine a starting step size in NR algorithm. precision: the convergence halts if the norm of delta between successive positions is less than epsilon. show_progress: since the fitter is iterative, show convergence diagnostics. max_steps: the maximum number of interations of the Newton-Rhaphson algorithm. Returns: beta: (1,d) numpy array. """ self.path = [] assert precision <= 1., "precision must be less than or equal to 1." n, d = X.shape # make sure betas are correct size. if initial_beta is not None: assert initial_beta.shape == (d, 1) beta = initial_beta else: beta = np.zeros((d, 1)) step_sizer = StepSizer(step_size) step_size = step_sizer.next() # Method of choice is just efron right now if self.tie_method == 'Efron': get_gradients = self._get_efron_values else: raise NotImplementedError("Only Efron is available.") i = 0 converging = True warn_ll = True ll, previous_ll = 0, 0 start = time.time() while converging: self.path.append(beta.copy()) i += 1 if self.strata is None: h, g, ll = get_gradients(X.values, beta, T.values, E.values, weights.values) else: g = np.zeros_like(beta).T h = np.zeros((beta.shape[0], beta.shape[0])) ll = 0 for strata in np.unique(X.index): stratified_X, stratified_T, stratified_E, stratified_W = X.loc[ [strata]], T.loc[[strata ]], E.loc[[strata ]], weights.loc[[strata]] _h, _g, _ll = get_gradients(stratified_X.values, beta, stratified_T.values, stratified_E.values, stratified_W.values) g += _g h += _h ll += _ll if self.penalizer > 0: # add the gradient and hessian of the l2 term g -= self.penalizer * beta.T h.flat[::d + 1] -= self.penalizer delta = solve(-h, step_size * g.T) if np.any(np.isnan(delta)): raise ValueError( """delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model """) # Save these as pending result hessian, gradient = h, g if show_progress: print( "Iteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, seconds_since_start = %.1f" % (i, norm(delta), step_size, ll, time.time() - start)) # convergence criteria if norm(delta) < precision: converging, completed = False, True elif abs(ll - previous_ll) < precision: converging, completed = False, True elif i >= max_steps: # 50 iterations steps with N-R is a lot. # Expected convergence is ~10 steps converging, completed = False, False elif step_size <= 0.00001: converging, completed = False, False elif abs(ll) < 0.0001 and norm(delta) > 1.0: warnings.warn( "The log-likelihood is getting suspciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \ See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ", ConvergenceWarning) converging, completed = False, False step_size = step_sizer.update(norm(delta)).next() beta += delta previous_ll = ll self._hessian_ = hessian self._score_ = gradient self._log_likelihood = ll if show_progress and completed: print("Convergence completed after %d iterations." % (i)) if not completed: warnings.warn( "Newton-Rhapson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning) return beta
def _newton_rhaphson( self, df, events, start, stop, weights, show_progress=False, step_size=None, precision=10e-6, max_steps=50, initial_point=None, ): # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements """ Newton Rhaphson algorithm for fitting CPH model. Parameters ---------- df: DataFrame stop_times_events: DataFrame meta information about the subjects history show_progress: boolean, optional (default: True) to show verbose output of convergence step_size: float > 0 to determine a starting step size in NR algorithm. precision: float the convergence halts if the norm of delta between successive positions is less than epsilon. Returns -------- beta: (1,d) numpy array. """ assert precision <= 1.0, "precision must be less than or equal to 1." _, d = df.shape # make sure betas are correct size. if initial_point is not None: beta = initial_point else: beta = np.zeros((d,)) i = 0 converging = True ll, previous_ll = 0, 0 start_time = time.time() step_sizer = StepSizer(step_size) step_size = step_sizer.next() while converging: i += 1 if self.strata is None: h, g, ll = self._get_gradients( df.values, events.values, start.values, stop.values, weights.values, beta ) else: g = np.zeros_like(beta) h = np.zeros((d, d)) ll = 0 for _h, _g, _ll in self._partition_by_strata_and_apply( df, events, start, stop, weights, self._get_gradients, beta ): g += _g h += _h ll += _ll if i == 1 and np.all(beta == 0): # this is a neat optimization, the null partial likelihood # is the same as the full partial but evaluated at zero. # if the user supplied a non-trivial initial point, we need to delay this. self._log_likelihood_null = ll if self.penalizer > 0: # add the gradient and hessian of the l2 term g -= self.penalizer * beta h.flat[:: d + 1] -= self.penalizer try: # reusing a piece to make g * inv(h) * g.T faster later inv_h_dot_g_T = spsolve(-h, g, sym_pos=True) except ValueError as e: if "infs or NaNs" in str(e): raise ConvergenceError( """hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model """, e, ) else: # something else? raise e except LinAlgError as e: raise ConvergenceError( """Convergence halted due to matrix inversion problems. Suspicion is high colinearity. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model """, e, ) delta = step_size * inv_h_dot_g_T if np.any(np.isnan(delta)): raise ConvergenceError( """delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model """ ) # Save these as pending result hessian, gradient = h, g norm_delta = norm(delta) newton_decrement = g.dot(inv_h_dot_g_T) / 2 if show_progress: print( "\rIteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f" % (i, norm_delta, step_size, ll, newton_decrement, time.time() - start_time), end="" ) # convergence criteria if norm_delta < precision: converging, completed = False, True elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) < 1e-09: # this is what R uses by default converging, completed = False, True elif newton_decrement < 10e-8: converging, completed = False, True elif i >= max_steps: # 50 iterations steps with N-R is a lot. # Expected convergence is less than 10 steps converging, completed = False, False elif step_size <= 0.0001: converging, completed = False, False elif abs(ll) < 0.0001 and norm_delta > 1.0: warnings.warn( "The log-likelihood is getting suspiciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \ See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression", ConvergenceWarning, ) converging, completed = False, False step_size = step_sizer.update(norm_delta).next() beta += delta self._hessian_ = hessian self._score_ = gradient self._log_likelihood = ll if show_progress and completed: print("Convergence completed after %d iterations." % (i)) elif show_progress and not completed: print("Convergence failed. See any warning messages.") # report to the user problems that we detect. if completed and norm_delta > 0.1: warnings.warn( "Newton-Rhapson convergence completed but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is colinearity or complete separation in the dataset?" % norm_delta, ConvergenceWarning, ) elif not completed: warnings.warn("Newton-Rhapson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning) return beta
def _newton_rhaphson(self, df, stop_times_events, show_progress=False, step_size=None, precision=10e-6, max_steps=50): """ Newton Rhaphson algorithm for fitting CPH model. Note that data is assumed to be sorted on T! Parameters: df: (n, d) Pandas DataFrame of observations stop_times_events: (n, d) Pandas DataFrame of meta information about the subjects history show_progress: True to show verbous output of convergence step_size: float > 0 to determine a starting step size in NR algorithm. precision: the convergence halts if the norm of delta between successive positions is less than epsilon. Returns: beta: (1,d) numpy array. """ assert precision <= 1., "precision must be less than or equal to 1." n, d = df.shape # make sure betas are correct size. beta = np.zeros((d, 1)) i = 0 converging = True ll, previous_ll = 0, 0 start = time.time() step_sizer = StepSizer(step_size) step_size = step_sizer.next() while converging: i += 1 h, g, ll = self._get_gradients(df, stop_times_events, beta) if self.penalizer > 0: # add the gradient and hessian of the l2 term g -= self.penalizer * beta.T h.flat[::d + 1] -= self.penalizer delta = solve(-h, step_size * g.T) if np.any(np.isnan(delta)): raise ValueError("""delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model """) # Save these as pending result hessian, gradient = h, g norm_delta = norm(delta) if show_progress: print("Iteration %d: norm_delta = %.6f, step_size = %.3f, ll = %.6f, seconds_since_start = %.1f" % (i, norm_delta, step_size, ll, time.time() - start)) # convergence criteria if norm_delta < precision: converging, completed = False, True elif abs(ll - previous_ll) < precision: converging, completed = False, True elif i >= max_steps: # 50 iterations steps with N-R is a lot. # Expected convergence is ~10 steps converging, completed = False, False elif step_size <= 0.0001: converging, completed = False, False elif abs(ll) < 0.0001 and norm_delta > 1.0: warnings.warn("The log-likelihood is getting suspciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \ See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ", ConvergenceWarning) converging, completed = False, False step_size = step_sizer.update(norm_delta).next() beta += delta self._hessian_ = hessian self._score_ = gradient self._log_likelihood = ll if show_progress and completed: print("Convergence completed after %d iterations." % (i)) if not completed: warnings.warn("Newton-Rhapson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning) return beta
def _newton_rhaphson(self, df, stop_times_events, weights, show_progress=False, step_size=None, precision=10e-6, max_steps=50): # pylint: disable=too-many-arguments,too-many-locals,too-many-branches """ Newton Rhaphson algorithm for fitting CPH model. Parameters ---------- df: DataFrame stop_times_events: DataFrame meta information about the subjects history show_progress: boolean, optional (default: True) to show verbous output of convergence step_size: float > 0 to determine a starting step size in NR algorithm. precision: float the convergence halts if the norm of delta between successive positions is less than epsilon. Returns -------- beta: (1,d) numpy array. """ assert precision <= 1.0, "precision must be less than or equal to 1." _, d = df.shape # make sure betas are correct size. beta = np.zeros((d, 1)) i = 0 converging = True ll, previous_ll = 0, 0 start = time.time() step_sizer = StepSizer(step_size) step_size = step_sizer.next() while converging: i += 1 if self.strata is None: h, g, ll = self._get_gradients(df, stop_times_events, weights, beta) else: g = np.zeros_like(beta).T h = np.zeros((beta.shape[0], beta.shape[0])) ll = 0 for _h, _g, _ll in self._partition_by_strata_and_apply( df, stop_times_events, weights, self._get_gradients, beta): g += _g h += _h ll += _ll if self.penalizer > 0: # add the gradient and hessian of the l2 term g -= self.penalizer * beta.T h.flat[::d + 1] -= self.penalizer try: # reusing a piece to make g * inv(h) * g.T faster later inv_h_dot_g_T = spsolve(-h, g.T, sym_pos=True) except ValueError as e: if "infs or NaNs" in str(e): raise ConvergenceError( """hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model """) else: # something else? raise e delta = step_size * inv_h_dot_g_T if np.any(np.isnan(delta)): raise ConvergenceError( """delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation: https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model """) # Save these as pending result hessian, gradient = h, g norm_delta = norm(delta) newton_decrement = g.dot(inv_h_dot_g_T) / 2 if show_progress: print( "Iteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f" % (i, norm_delta, step_size, ll, newton_decrement, time.time() - start)) # convergence criteria if norm_delta < precision: converging, completed = False, True elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) < 1e-09: # this is what R uses by default converging, completed = False, True elif newton_decrement < 10e-8: converging, completed = False, True elif i >= max_steps: # 50 iterations steps with N-R is a lot. # Expected convergence is less than 10 steps converging, completed = False, False elif step_size <= 0.0001: converging, completed = False, False elif abs(ll) < 0.0001 and norm_delta > 1.0: warnings.warn( "The log-likelihood is getting suspciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \ See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ", ConvergenceWarning, ) converging, completed = False, False step_size = step_sizer.update(norm_delta).next() beta += delta self._hessian_ = hessian self._score_ = gradient self._log_likelihood = ll if show_progress and completed: print("Convergence completed after %d iterations." % (i)) if not completed: warnings.warn( "Newton-Rhapson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning) return beta