Python StepSizer Examples

Programming Language: Python

Namespace/Package Name: lifelines.utils

Class/Type: StepSizer

Examples at hotexamples.com: 5

Python StepSizer - 5 examples found. These are the top rated real world Python examples of lifelines.utils.StepSizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

StepSizer(4)

next(4)

update(4)

Frequently Used Methods

StepSizer (4)

next (4)

update (4)

Example #1

Show file

    def _newton_rhaphson(
        self,
        df,
        events,
        start,
        stop,
        weights,
        show_progress=False,
        step_size=None,
        precision=10e-6,
        max_steps=50,
        initial_point=None,
    ):  # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements
        """
        Newton Rhaphson algorithm for fitting CPH model.

        Parameters
        ----------
        df: DataFrame
        stop_times_events: DataFrame
             meta information about the subjects history
        show_progress: boolean, optional (default: True)
            to show verbose output of convergence
        step_size: float
            > 0 to determine a starting step size in NR algorithm.
        precision: float
            the convergence halts if the norm of delta between
                     successive positions is less than epsilon.

        Returns
        --------
        beta: (1,d) numpy array.
        """
        assert precision <= 1.0, "precision must be less than or equal to 1."

        _, d = df.shape

        # make sure betas are correct size.
        if initial_point is not None:
            beta = initial_point
        else:
            beta = np.zeros((d, ))

        i = 0
        converging = True
        ll, previous_ll = 0, 0
        start_time = time.time()

        step_sizer = StepSizer(step_size)
        step_size = step_sizer.next()

        while converging:
            i += 1

            if self.strata is None:
                h, g, ll = self._get_gradients(df.values, events.values,
                                               start.values, stop.values,
                                               weights.values, beta)
            else:
                g = np.zeros_like(beta)
                h = np.zeros((d, d))
                ll = 0
                for _h, _g, _ll in self._partition_by_strata_and_apply(
                        df, events, start, stop, weights, self._get_gradients,
                        beta):
                    g += _g
                    h += _h
                    ll += _ll

            if i == 1 and np.all(beta == 0):
                # this is a neat optimization, the null partial likelihood
                # is the same as the full partial but evaluated at zero.
                # if the user supplied a non-trivial initial point, we need to delay this.
                self._log_likelihood_null = ll

            if self.penalizer > 0:
                # add the gradient and hessian of the l2 term
                g -= self.penalizer * beta
                h.flat[::d + 1] -= self.penalizer

            try:
                # reusing a piece to make g * inv(h) * g.T faster later
                inv_h_dot_g_T = spsolve(-h, g, sym_pos=True)
            except ValueError as e:
                if "infs or NaNs" in str(e):
                    raise ConvergenceError(
                        """hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""",
                        e,
                    )
                else:
                    # something else?
                    raise e
            except LinAlgError as e:
                raise ConvergenceError(
                    """Convergence halted due to matrix inversion problems. Suspicion is high colinearity. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""",
                    e,
                )

            delta = step_size * inv_h_dot_g_T

            if np.any(np.isnan(delta)):
                raise ConvergenceError(
                    """delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""")
            # Save these as pending result
            hessian, gradient = h, g
            norm_delta = norm(delta)
            newton_decrement = g.dot(inv_h_dot_g_T) / 2

            if show_progress:
                print(
                    "\rIteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f"
                    % (i, norm_delta, step_size, ll, newton_decrement,
                       time.time() - start_time),
                    end="",
                )

            # convergence criteria
            if norm_delta < precision:
                converging, completed = False, True
            elif previous_ll > 0 and abs(ll -
                                         previous_ll) / (-previous_ll) < 1e-09:
                # this is what R uses by default
                converging, completed = False, True
            elif newton_decrement < 10e-8:
                converging, completed = False, True
            elif i >= max_steps:
                # 50 iterations steps with N-R is a lot.
                # Expected convergence is less than 10 steps
                converging, completed = False, False
            elif step_size <= 0.0001:
                converging, completed = False, False
            elif abs(ll) < 0.0001 and norm_delta > 1.0:
                warnings.warn(
                    "The log-likelihood is getting suspiciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \
See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression",
                    ConvergenceWarning,
                )
                converging, completed = False, False

            step_size = step_sizer.update(norm_delta).next()

            beta += delta

        self._hessian_ = hessian
        self._score_ = gradient
        self._log_likelihood = ll

        if show_progress and completed:
            print("Convergence completed after %d iterations." % (i))
        elif show_progress and not completed:
            print("Convergence failed. See any warning messages.")

        # report to the user problems that we detect.
        if completed and norm_delta > 0.1:
            warnings.warn(
                "Newton-Rhapson convergence completed but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is colinearity or complete separation in the dataset?"
                % norm_delta,
                ConvergenceWarning,
            )
        elif not completed:
            warnings.warn(
                "Newton-Rhapson failed to converge sufficiently in %d steps." %
                max_steps, ConvergenceWarning)

        return beta

Example #2

Show file

    def _newton_rhaphson(self,
                         X,
                         T,
                         E,
                         weights=None,
                         initial_beta=None,
                         step_size=None,
                         precision=10e-6,
                         show_progress=True,
                         max_steps=50):
        """
        Newton Rhaphson algorithm for fitting CPH model.

        Note that data is assumed to be sorted on T!

        Parameters:
            X: (n,d) Pandas DataFrame of observations.
            T: (n) Pandas Series representing observed durations.
            E: (n) Pandas Series representing death events.
            weights: (n) an iterable representing weights per observation.
            initial_beta: (1,d) numpy array of initial starting point for
                          NR algorithm. Default 0.
            step_size: float > 0.001 to determine a starting step size in NR algorithm.
            precision: the convergence halts if the norm of delta between
                     successive positions is less than epsilon.
            show_progress: since the fitter is iterative, show convergence
                     diagnostics.
            max_steps: the maximum number of interations of the Newton-Rhaphson algorithm.

        Returns:
            beta: (1,d) numpy array.
        """
        self.path = []
        assert precision <= 1., "precision must be less than or equal to 1."
        n, d = X.shape

        # make sure betas are correct size.
        if initial_beta is not None:
            assert initial_beta.shape == (d, 1)
            beta = initial_beta
        else:
            beta = np.zeros((d, 1))

        step_sizer = StepSizer(step_size)
        step_size = step_sizer.next()

        # Method of choice is just efron right now
        if self.tie_method == 'Efron':
            get_gradients = self._get_efron_values
        else:
            raise NotImplementedError("Only Efron is available.")

        i = 0
        converging = True
        warn_ll = True
        ll, previous_ll = 0, 0
        start = time.time()

        while converging:
            self.path.append(beta.copy())
            i += 1
            if self.strata is None:
                h, g, ll = get_gradients(X.values, beta, T.values, E.values,
                                         weights.values)
            else:
                g = np.zeros_like(beta).T
                h = np.zeros((beta.shape[0], beta.shape[0]))
                ll = 0
                for strata in np.unique(X.index):
                    stratified_X, stratified_T, stratified_E, stratified_W = X.loc[
                        [strata]], T.loc[[strata
                                          ]], E.loc[[strata
                                                     ]], weights.loc[[strata]]
                    _h, _g, _ll = get_gradients(stratified_X.values, beta,
                                                stratified_T.values,
                                                stratified_E.values,
                                                stratified_W.values)
                    g += _g
                    h += _h
                    ll += _ll

            if self.penalizer > 0:
                # add the gradient and hessian of the l2 term
                g -= self.penalizer * beta.T
                h.flat[::d + 1] -= self.penalizer

            delta = solve(-h, step_size * g.T)
            if np.any(np.isnan(delta)):
                raise ValueError(
                    """delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""")

            # Save these as pending result
            hessian, gradient = h, g

            if show_progress:
                print(
                    "Iteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, seconds_since_start = %.1f"
                    % (i, norm(delta), step_size, ll, time.time() - start))
            # convergence criteria
            if norm(delta) < precision:
                converging, completed = False, True
            elif abs(ll - previous_ll) < precision:
                converging, completed = False, True
            elif i >= max_steps:
                # 50 iterations steps with N-R is a lot.
                # Expected convergence is ~10 steps
                converging, completed = False, False
            elif step_size <= 0.00001:
                converging, completed = False, False
            elif abs(ll) < 0.0001 and norm(delta) > 1.0:
                warnings.warn(
                    "The log-likelihood is getting suspciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \
See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ",
                    ConvergenceWarning)
                converging, completed = False, False

            step_size = step_sizer.update(norm(delta)).next()

            beta += delta
            previous_ll = ll

        self._hessian_ = hessian
        self._score_ = gradient
        self._log_likelihood = ll

        if show_progress and completed:
            print("Convergence completed after %d iterations." % (i))
        if not completed:
            warnings.warn(
                "Newton-Rhapson failed to converge sufficiently in %d steps." %
                max_steps, ConvergenceWarning)

        return beta

Example #3

Show file

File: cox_time_varying_fitter.py Project: CamDavidsonPilon/lifelines

    def _newton_rhaphson(
        self,
        df,
        events,
        start,
        stop,
        weights,
        show_progress=False,
        step_size=None,
        precision=10e-6,
        max_steps=50,
        initial_point=None,
    ):  # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements
        """
        Newton Rhaphson algorithm for fitting CPH model.

        Parameters
        ----------
        df: DataFrame
        stop_times_events: DataFrame
             meta information about the subjects history
        show_progress: boolean, optional (default: True)
            to show verbose output of convergence
        step_size: float
            > 0 to determine a starting step size in NR algorithm.
        precision: float
            the convergence halts if the norm of delta between
                     successive positions is less than epsilon.

        Returns
        --------
        beta: (1,d) numpy array.
        """
        assert precision <= 1.0, "precision must be less than or equal to 1."

        _, d = df.shape

        # make sure betas are correct size.
        if initial_point is not None:
            beta = initial_point
        else:
            beta = np.zeros((d,))

        i = 0
        converging = True
        ll, previous_ll = 0, 0
        start_time = time.time()

        step_sizer = StepSizer(step_size)
        step_size = step_sizer.next()

        while converging:
            i += 1

            if self.strata is None:
                h, g, ll = self._get_gradients(
                    df.values, events.values, start.values, stop.values, weights.values, beta
                )
            else:
                g = np.zeros_like(beta)
                h = np.zeros((d, d))
                ll = 0
                for _h, _g, _ll in self._partition_by_strata_and_apply(
                    df, events, start, stop, weights, self._get_gradients, beta
                ):
                    g += _g
                    h += _h
                    ll += _ll

            if i == 1 and np.all(beta == 0):
                # this is a neat optimization, the null partial likelihood
                # is the same as the full partial but evaluated at zero.
                # if the user supplied a non-trivial initial point, we need to delay this.
                self._log_likelihood_null = ll

            if self.penalizer > 0:
                # add the gradient and hessian of the l2 term
                g -= self.penalizer * beta
                h.flat[:: d + 1] -= self.penalizer

            try:
                # reusing a piece to make g * inv(h) * g.T faster later
                inv_h_dot_g_T = spsolve(-h, g, sym_pos=True)
            except ValueError as e:
                if "infs or NaNs" in str(e):
                    raise ConvergenceError(
                        """hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""",
                        e,
                    )
                else:
                    # something else?
                    raise e
            except LinAlgError as e:
                raise ConvergenceError(
                    """Convergence halted due to matrix inversion problems. Suspicion is high colinearity. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""",
                    e,
                )

            delta = step_size * inv_h_dot_g_T

            if np.any(np.isnan(delta)):
                raise ConvergenceError(
                    """delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
"""
                )
            # Save these as pending result
            hessian, gradient = h, g
            norm_delta = norm(delta)
            newton_decrement = g.dot(inv_h_dot_g_T) / 2

            if show_progress:
                print(
                    "\rIteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f"
                    % (i, norm_delta, step_size, ll, newton_decrement, time.time() - start_time),
                    end=""
                )

            # convergence criteria
            if norm_delta < precision:
                converging, completed = False, True
            elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) < 1e-09:
                # this is what R uses by default
                converging, completed = False, True
            elif newton_decrement < 10e-8:
                converging, completed = False, True
            elif i >= max_steps:
                # 50 iterations steps with N-R is a lot.
                # Expected convergence is less than 10 steps
                converging, completed = False, False
            elif step_size <= 0.0001:
                converging, completed = False, False
            elif abs(ll) < 0.0001 and norm_delta > 1.0:
                warnings.warn(
                    "The log-likelihood is getting suspiciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \
See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression",
                    ConvergenceWarning,
                )
                converging, completed = False, False

            step_size = step_sizer.update(norm_delta).next()

            beta += delta

        self._hessian_ = hessian
        self._score_ = gradient
        self._log_likelihood = ll

        if show_progress and completed:
            print("Convergence completed after %d iterations." % (i))
        elif show_progress and not completed:
            print("Convergence failed. See any warning messages.")

        # report to the user problems that we detect.
        if completed and norm_delta > 0.1:
            warnings.warn(
                "Newton-Rhapson convergence completed but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is colinearity or complete separation in the dataset?"
                % norm_delta,
                ConvergenceWarning,
            )
        elif not completed:
            warnings.warn("Newton-Rhapson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning)

        return beta

Example #4

Show file

    def _newton_rhaphson(self, df, stop_times_events, show_progress=False, step_size=None, precision=10e-6,
                         max_steps=50):
        """
        Newton Rhaphson algorithm for fitting CPH model.

        Note that data is assumed to be sorted on T!

        Parameters:
            df: (n, d) Pandas DataFrame of observations
            stop_times_events: (n, d) Pandas DataFrame of meta information about the subjects history
            show_progress: True to show verbous output of convergence
            step_size: float > 0 to determine a starting step size in NR algorithm.
            precision: the convergence halts if the norm of delta between
                     successive positions is less than epsilon.

        Returns:
            beta: (1,d) numpy array.
        """
        assert precision <= 1., "precision must be less than or equal to 1."

        n, d = df.shape

        # make sure betas are correct size.
        beta = np.zeros((d, 1))

        i = 0
        converging = True
        ll, previous_ll = 0, 0
        start = time.time()

        step_sizer = StepSizer(step_size)
        step_size = step_sizer.next()

        while converging:
            i += 1
            h, g, ll = self._get_gradients(df, stop_times_events, beta)

            if self.penalizer > 0:
                # add the gradient and hessian of the l2 term
                g -= self.penalizer * beta.T
                h.flat[::d + 1] -= self.penalizer

            delta = solve(-h, step_size * g.T)
            if np.any(np.isnan(delta)):
                raise ValueError("""delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""")
            # Save these as pending result
            hessian, gradient = h, g
            norm_delta = norm(delta)

            if show_progress:
                print("Iteration %d: norm_delta = %.6f, step_size = %.3f, ll = %.6f, seconds_since_start = %.1f" % (i, norm_delta, step_size, ll, time.time() - start))

            # convergence criteria
            if norm_delta < precision:
                converging, completed = False, True
            elif abs(ll - previous_ll) < precision:
                converging, completed = False, True
            elif i >= max_steps:
                # 50 iterations steps with N-R is a lot.
                # Expected convergence is ~10 steps
                converging, completed = False, False
            elif step_size <= 0.0001:
                converging, completed = False, False
            elif abs(ll) < 0.0001 and norm_delta > 1.0:
                warnings.warn("The log-likelihood is getting suspciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \
See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ", ConvergenceWarning)
                converging, completed = False, False

            step_size = step_sizer.update(norm_delta).next()

            beta += delta

        self._hessian_ = hessian
        self._score_ = gradient
        self._log_likelihood = ll

        if show_progress and completed:
            print("Convergence completed after %d iterations." % (i))
        if not completed:
            warnings.warn("Newton-Rhapson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning)

        return beta

Example #5

Show file

    def _newton_rhaphson(self,
                         df,
                         stop_times_events,
                         weights,
                         show_progress=False,
                         step_size=None,
                         precision=10e-6,
                         max_steps=50):  # pylint: disable=too-many-arguments,too-many-locals,too-many-branches
        """
        Newton Rhaphson algorithm for fitting CPH model.

        Parameters
        ----------
        df: DataFrame 
        stop_times_events: DataFrame
             meta information about the subjects history
        show_progress: boolean, optional (default: True) 
            to show verbous output of convergence
        step_size: float 
            > 0 to determine a starting step size in NR algorithm.
        precision: float
            the convergence halts if the norm of delta between
                     successive positions is less than epsilon.

        Returns
        --------
        beta: (1,d) numpy array.
        """
        assert precision <= 1.0, "precision must be less than or equal to 1."

        _, d = df.shape

        # make sure betas are correct size.
        beta = np.zeros((d, 1))

        i = 0
        converging = True
        ll, previous_ll = 0, 0
        start = time.time()

        step_sizer = StepSizer(step_size)
        step_size = step_sizer.next()

        while converging:
            i += 1

            if self.strata is None:
                h, g, ll = self._get_gradients(df, stop_times_events, weights,
                                               beta)
            else:
                g = np.zeros_like(beta).T
                h = np.zeros((beta.shape[0], beta.shape[0]))
                ll = 0
                for _h, _g, _ll in self._partition_by_strata_and_apply(
                        df, stop_times_events, weights, self._get_gradients,
                        beta):
                    g += _g
                    h += _h
                    ll += _ll

            if self.penalizer > 0:
                # add the gradient and hessian of the l2 term
                g -= self.penalizer * beta.T
                h.flat[::d + 1] -= self.penalizer

            try:
                # reusing a piece to make g * inv(h) * g.T faster later
                inv_h_dot_g_T = spsolve(-h, g.T, sym_pos=True)
            except ValueError as e:
                if "infs or NaNs" in str(e):
                    raise ConvergenceError(
                        """hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""")
                else:
                    # something else?
                    raise e

            delta = step_size * inv_h_dot_g_T

            if np.any(np.isnan(delta)):
                raise ConvergenceError(
                    """delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""")
            # Save these as pending result
            hessian, gradient = h, g
            norm_delta = norm(delta)
            newton_decrement = g.dot(inv_h_dot_g_T) / 2

            if show_progress:
                print(
                    "Iteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f"
                    % (i, norm_delta, step_size, ll, newton_decrement,
                       time.time() - start))

            # convergence criteria
            if norm_delta < precision:
                converging, completed = False, True
            elif previous_ll > 0 and abs(ll -
                                         previous_ll) / (-previous_ll) < 1e-09:
                # this is what R uses by default
                converging, completed = False, True
            elif newton_decrement < 10e-8:
                converging, completed = False, True
            elif i >= max_steps:
                # 50 iterations steps with N-R is a lot.
                # Expected convergence is less than 10 steps
                converging, completed = False, False
            elif step_size <= 0.0001:
                converging, completed = False, False
            elif abs(ll) < 0.0001 and norm_delta > 1.0:
                warnings.warn(
                    "The log-likelihood is getting suspciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \
See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ",
                    ConvergenceWarning,
                )
                converging, completed = False, False

            step_size = step_sizer.update(norm_delta).next()

            beta += delta

        self._hessian_ = hessian
        self._score_ = gradient
        self._log_likelihood = ll

        if show_progress and completed:
            print("Convergence completed after %d iterations." % (i))
        if not completed:
            warnings.warn(
                "Newton-Rhapson failed to converge sufficiently in %d steps." %
                max_steps, ConvergenceWarning)

        return beta