Ejemplo n.º 1
0
    def _fit_model(self, T, E, entry, show_progress=True):

        non_zero_entries = entry[entry > 0]
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            results = minimize(
                value_and_grad(self._negative_log_likelihood),  # pylint: disable=no-value-for-parameter
                self._initial_values,
                jac=True,
                method="L-BFGS-B",
                args=(T, E, non_zero_entries),
                bounds=self._bounds,
                options={"disp": show_progress},
            )

            if results.success:
                # pylint: disable=no-value-for-parameter
                hessian_ = hessian(self._negative_log_likelihood)(results.x, T, E, non_zero_entries)
                return results.x, -results.fun * T.shape[0], T.shape[0] * hessian_
            print(results)
            if self._KNOWN_MODEL:
                raise ConvergenceError(
                    dedent(
                        """\
                    Fitting did not converge. This is mostly a lifelines problem, but a few things you can check:
                    1. Are there any extreme values in the durations column?
                      - Try scaling your durations to a more reasonable values closer to 1 (multipling or dividing by some 10^n).
                      - Try dropping them to see if the model converges.
                """
                    )
                )

            else:
                raise ConvergenceError(
                    dedent(
                        """\
                    Fitting did not converge.

                    1. Are two parameters in the model colinear / exchangeable? (Change model)
                    2. Is the cumulative hazard always non-negative and always non-decreasing? (Assumption error)
                    3. Are there inputs to the cumulative hazard that could produce nans or infs? (Check your _bounds)

                    This could be a problem with your data:
                    1. Are there any extreme values in the durations column?
                        - Try scaling your durations to a more reasonable value closer to 1 (multipling or dividing by a large constant).
                        - Try dropping them to see if the model converges.
                    """
                    )
                )
Ejemplo n.º 2
0
    def _fit_model(self, T, E, *Xs, **kwargs):
        # TODO: move this to function kwarg when I remove py2
        show_progress = kwargs.pop("show_progress", False)
        n_params = sum([X.shape[1] for X in Xs])
        init_values = np.zeros((n_params, ))

        results = minimize(
            value_and_grad(self._negative_log_likelihood),
            init_values,
            method=None if self.l1_ratio <= 0.0 else "L-BFGS-B",
            jac=True,
            args=(T, E, Xs[0], Xs[1]),  # TODO: remove py2, (T, E, *Xs)
            options={"disp": show_progress},
        )
        if show_progress:
            print(results)

        if results.success:
            # pylint: disable=no-value-for-parameter
            hessian_ = hessian(self._negative_log_likelihood)(results.x, T, E,
                                                              *Xs)
            return results.x, -self._n_examples * results.fun, self._n_examples * hessian_
        print(results)
        raise ConvergenceError(
            dedent("""\
            Fitting did not converge. This could be a problem with your data:
            1. Are there any extreme values? (Try modelling them or dropping them to see if it helps convergence)
        """))
Ejemplo n.º 3
0
    def _fit_model(self, T, E, entry, show_progress=True):

        non_zero_entries = entry[entry > 0]
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            results = minimize(
                value_and_grad(self._negative_log_likelihood),  # pylint: disable=no-value-for-parameter
                self._initial_values,
                jac=True,
                method="L-BFGS-B",
                args=(T, E, non_zero_entries),
                bounds=self._bounds,
                options={"disp": show_progress},
            )

            if results.success:
                # pylint: disable=no-value-for-parameter
                hessian_ = hessian(self._negative_log_likelihood)(
                    results.x, T, E, non_zero_entries)
                return results.x, -results.fun, hessian_ * T.shape[0]
            print(results)
            raise ConvergenceError(
                dedent("""\
                Fitting did not converge. 

                1. Are two parameters in the model colinear / exchangeable? (Change model)
                2. Is the cumulative hazard always non-negative and always non-decreasing? (Assumption error)
                3. Are there inputs to the cumulative hazard that could produce nans or infs? (Check your _bounds)

                This could be a problem with your data:
                1. Are there any extreme values? (Try modelling them or dropping them to see if it helps convergence)
            """))
Ejemplo n.º 4
0
    def _newton_rhaphson(self, T, E, precision=1e-5, show_progress=False):
        from lifelines.utils import _smart_search

        def hessian_function(parameters, T, E):
            return np.array([[
                _d_lambda_d_lambda_(parameters, T, E),
                _d_rho_d_lambda_(parameters, T, E)
            ],
                             [
                                 _d_rho_d_lambda_(parameters, T, E),
                                 _d_rho_d_rho(parameters, T, E)
                             ]])

        def gradient_function(parameters, T, E):
            return np.array([
                _lambda_gradient(parameters, T, E),
                _rho_gradient(parameters, T, E)
            ])

        # initialize the parameters. This shows dramatic improvements.
        parameters = _smart_search(_negative_log_likelihood, 2, T, E)
        i = 1
        step_size = 0.9
        max_steps = 50
        converging, completed = True, False
        start = time.time()

        while converging and i < max_steps:
            # Do not override hessian and gradient in case of garbage
            h, g = hessian_function(parameters, T,
                                    E), gradient_function(parameters, T, E)

            delta = solve(h, -step_size * g.T)
            if np.any(np.isnan(delta)):
                raise ConvergenceError(
                    "delta contains nan value(s). Convergence halted.")

            parameters += delta

            # Save these as pending result
            hessian = h

            if show_progress:
                print(
                    "Iteration %d: norm_delta = %.5f, seconds_since_start = %.1f"
                    % (i, norm(delta), time.time() - start))

            if norm(delta) < precision:
                converging = False
                completed = True
            i += 1

        if show_progress and completed:
            print("Convergence completed after %d iterations." % (i))
        if not completed:
            warnings.warn(
                "Newton-Rhapson failed to converge sufficiently in %d steps." %
                max_steps, ConvergenceWarning)

        return parameters, hessian
Ejemplo n.º 5
0
    def _fit_model(self, T, E, weights, X, show_progress=False, initial_point=None):

        if initial_point is None:
            initial_point = self._create_initial_point(T, E, X)

        results = minimize(
            # using value_and_grad is much faster (takes advantage of shared computations) than spitting.
            value_and_grad(self._negative_log_likelihood),
            initial_point,
            method=None,
            jac=True,
            args=(T, E, weights, X),
            options={"disp": show_progress},
        )
        if show_progress or not results.success:
            print(results)

        if results.success:
            sum_weights = weights.sum()
            # pylint: disable=no-value-for-parameter
            hessian_ = hessian(self._negative_log_likelihood)(results.x, T, E, weights, X)
            return results.x, -sum_weights * results.fun, sum_weights * hessian_

        raise ConvergenceError(
            dedent(
                """\
            Fitting did not converge. This could be a problem with your data:
            1. Does a column have extremely high mean or variance? Try standardizing it.
            2. Are there any extreme outliers? Try modeling them or dropping them to see if it helps convergence
            3. Trying adding a small penalizer (or changing it, if already present). Example: `%s(penalizer=0.01).fit(...)`
        """
                % self._class_name
            )
        )
Ejemplo n.º 6
0
    def _fit_model(self,
                   likelihood,
                   Ts,
                   Xs,
                   E,
                   weights,
                   entries,
                   show_progress=False,
                   initial_point=None):

        if initial_point is None:
            initial_point = self._create_initial_point(Xs)

        assert initial_point.shape[
            0] == Xs.size, "initial_point is not the correct shape."

        self._neg_likelihood_with_penalty_function = lambda *args: self._add_penalty(
            -self._wrap_ll(likelihood)(*args), *args)

        results = minimize(
            # using value_and_grad is much faster (takes advantage of shared computations) than splitting.
            value_and_grad(self._neg_likelihood_with_penalty_function),
            initial_point,
            method=None,
            jac=True,
            args=(Ts, E, weights, entries, Xs),
            options={"disp": show_progress},
        )
        if show_progress or not results.success:
            print(results)

        if results.success:
            sum_weights = weights.sum()
            # pylint: disable=no-value-for-parameter
            hessian_ = hessian(self._neg_likelihood_with_penalty_function)(
                results.x, Ts, E, weights, entries, Xs)
            return results.x, -sum_weights * results.fun, sum_weights * hessian_

        name = self._class_name
        raise ConvergenceError(
            dedent("""\
            Fitting did not converge. This could be a problem with your dataset:

            0. Are there any lifelines warnings outputted during the `fit`?
            1. Inspect your DataFrame: does everything look as expected?
            2. Is there high-collinearity in the dataset? Try using the variance inflation factor (VIF) to find redundant variables.
            3. Trying adding a small penalizer (or changing it, if already present). Example: `%s(penalizer=0.01).fit(...)`.
            4. Are there any extreme outliers? Try modeling them or dropping them to see if it helps convergence.
        """ % name))
Ejemplo n.º 7
0
    def _fit_model(self, T, E, initial_values=None):
        if initial_values is None:
            initial_values = np.array([log(T).mean(), log(T).std()])

        def gradient_function(parameters, log_T, E):
            return np.array([
                _mu_gradient(parameters, log_T, E),
                _sigma_gradient(parameters, log_T, E)
            ])

        results = minimize(
            _negative_log_likelihood,
            initial_values,
            args=(log(T), E),
            jac=gradient_function,
            method="BFGS",
            options={"gtol": 1e-5},
        )
        if results.success:
            return results.x, -results.fun, results.hess_inv
        print(results)
        raise ConvergenceError(
            "Did not converge. This is a lifelines problem, not yours;")
Ejemplo n.º 8
0
    def _newton_rhaphson(
        self,
        df,
        events,
        start,
        stop,
        weights,
        show_progress=False,
        step_size=None,
        precision=10e-6,
        max_steps=50,
        initial_point=None,
    ):  # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements
        """
        Newton Rhaphson algorithm for fitting CPH model.

        Parameters
        ----------
        df: DataFrame
        stop_times_events: DataFrame
             meta information about the subjects history
        show_progress: boolean, optional (default: True)
            to show verbose output of convergence
        step_size: float
            > 0 to determine a starting step size in NR algorithm.
        precision: float
            the convergence halts if the norm of delta between
                     successive positions is less than epsilon.

        Returns
        --------
        beta: (1,d) numpy array.
        """
        assert precision <= 1.0, "precision must be less than or equal to 1."

        _, d = df.shape

        # make sure betas are correct size.
        if initial_point is not None:
            beta = initial_point
        else:
            beta = np.zeros((d, ))

        i = 0
        converging = True
        ll, previous_ll = 0, 0
        start_time = time.time()

        step_sizer = StepSizer(step_size)
        step_size = step_sizer.next()

        while converging:
            i += 1

            if self.strata is None:
                h, g, ll = self._get_gradients(df.values, events.values,
                                               start.values, stop.values,
                                               weights.values, beta)
            else:
                g = np.zeros_like(beta)
                h = np.zeros((d, d))
                ll = 0
                for _h, _g, _ll in self._partition_by_strata_and_apply(
                        df, events, start, stop, weights, self._get_gradients,
                        beta):
                    g += _g
                    h += _h
                    ll += _ll

            if i == 1 and np.all(beta == 0):
                # this is a neat optimization, the null partial likelihood
                # is the same as the full partial but evaluated at zero.
                # if the user supplied a non-trivial initial point, we need to delay this.
                self._log_likelihood_null = ll

            if self.penalizer > 0:
                # add the gradient and hessian of the l2 term
                g -= self.penalizer * beta
                h.flat[::d + 1] -= self.penalizer

            try:
                # reusing a piece to make g * inv(h) * g.T faster later
                inv_h_dot_g_T = spsolve(-h, g, sym_pos=True)
            except ValueError as e:
                if "infs or NaNs" in str(e):
                    raise ConvergenceError(
                        """hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""",
                        e,
                    )
                else:
                    # something else?
                    raise e
            except LinAlgError as e:
                raise ConvergenceError(
                    """Convergence halted due to matrix inversion problems. Suspicion is high colinearity. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""",
                    e,
                )

            delta = step_size * inv_h_dot_g_T

            if np.any(np.isnan(delta)):
                raise ConvergenceError(
                    """delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""")
            # Save these as pending result
            hessian, gradient = h, g
            norm_delta = norm(delta)
            newton_decrement = g.dot(inv_h_dot_g_T) / 2

            if show_progress:
                print(
                    "\rIteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f"
                    % (i, norm_delta, step_size, ll, newton_decrement,
                       time.time() - start_time),
                    end="",
                )

            # convergence criteria
            if norm_delta < precision:
                converging, completed = False, True
            elif previous_ll > 0 and abs(ll -
                                         previous_ll) / (-previous_ll) < 1e-09:
                # this is what R uses by default
                converging, completed = False, True
            elif newton_decrement < 10e-8:
                converging, completed = False, True
            elif i >= max_steps:
                # 50 iterations steps with N-R is a lot.
                # Expected convergence is less than 10 steps
                converging, completed = False, False
            elif step_size <= 0.0001:
                converging, completed = False, False
            elif abs(ll) < 0.0001 and norm_delta > 1.0:
                warnings.warn(
                    "The log-likelihood is getting suspiciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \
See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression",
                    ConvergenceWarning,
                )
                converging, completed = False, False

            step_size = step_sizer.update(norm_delta).next()

            beta += delta

        self._hessian_ = hessian
        self._score_ = gradient
        self._log_likelihood = ll

        if show_progress and completed:
            print("Convergence completed after %d iterations." % (i))
        elif show_progress and not completed:
            print("Convergence failed. See any warning messages.")

        # report to the user problems that we detect.
        if completed and norm_delta > 0.1:
            warnings.warn(
                "Newton-Rhapson convergence completed but norm(delta) is still high, %.3f. This may imply non-unique solutions to the maximum likelihood. Perhaps there is colinearity or complete separation in the dataset?"
                % norm_delta,
                ConvergenceWarning,
            )
        elif not completed:
            warnings.warn(
                "Newton-Rhapson failed to converge sufficiently in %d steps." %
                max_steps, ConvergenceWarning)

        return beta
Ejemplo n.º 9
0
    def _newton_rhaphson(self,
                         X,
                         T,
                         E,
                         weights=None,
                         initial_beta=None,
                         step_size=None,
                         precision=10e-6,
                         show_progress=True,
                         max_steps=50):
        """
        Newton Rhaphson algorithm for fitting CPH model.

        Note that data is assumed to be sorted on T!

        Parameters:
            X: (n,d) Pandas DataFrame of observations.
            T: (n) Pandas Series representing observed durations.
            E: (n) Pandas Series representing death events.
            weights: (n) an iterable representing weights per observation.
            initial_beta: (1,d) numpy array of initial starting point for
                          NR algorithm. Default 0.
            step_size: float > 0.001 to determine a starting step size in NR algorithm.
            precision: the convergence halts if the norm of delta between
                     successive positions is less than epsilon.
            show_progress: since the fitter is iterative, show convergence
                     diagnostics.
            max_steps: the maximum number of interations of the Newton-Rhaphson algorithm.

        Returns:
            beta: (1,d) numpy array.
        """
        self.path = []
        assert precision <= 1., "precision must be less than or equal to 1."
        n, d = X.shape

        # make sure betas are correct size.
        if initial_beta is not None:
            assert initial_beta.shape == (d, 1)
            beta = initial_beta
        else:
            beta = np.zeros((d, 1))

        step_sizer = StepSizer(step_size)
        step_size = step_sizer.next()

        # Method of choice is just efron right now
        if self.tie_method == 'Efron':
            get_gradients = self._get_efron_values
        else:
            raise NotImplementedError("Only Efron is available.")

        i = 0
        converging = True
        ll, previous_ll = 0, 0
        start = time.time()

        while converging:
            self.path.append(beta.copy())
            i += 1
            if self.strata is None:
                h, g, ll = get_gradients(X.values, beta, T.values, E.values,
                                         weights.values)
            else:
                g = np.zeros_like(beta).T
                h = np.zeros((beta.shape[0], beta.shape[0]))
                ll = 0
                for strata in np.unique(X.index):
                    stratified_X, stratified_T, stratified_E, stratified_W = X.loc[
                        [strata]], T.loc[[strata
                                          ]], E.loc[[strata
                                                     ]], weights.loc[[strata]]
                    _h, _g, _ll = get_gradients(stratified_X.values, beta,
                                                stratified_T.values,
                                                stratified_E.values,
                                                stratified_W.values)
                    g += _g
                    h += _h
                    ll += _ll

            if self.penalizer > 0:
                # add the gradient and hessian of the l2 term
                g -= self.penalizer * beta.T
                h.flat[::d + 1] -= self.penalizer

            # reusing a piece to make g * inv(h) * g.T faster later
            try:
                inv_h_dot_g_T = spsolve(-h, g.T, sym_pos=True)
            except ValueError as e:
                if 'infs or NaNs' in str(e):
                    raise ConvergenceError(
                        """hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""")
                else:
                    # something else?
                    raise e

            delta = step_size * inv_h_dot_g_T

            if np.any(np.isnan(delta)):
                raise ConvergenceError(
                    """delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""")

            # Save these as pending result
            hessian, gradient = h, g
            norm_delta = norm(delta)

            # reusing an above piece to make g * inv(h) * g.T faster.
            newton_decrement = g.dot(inv_h_dot_g_T) / 2

            if show_progress:
                print(
                    "Iteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f"
                    % (i, norm_delta, step_size, ll, newton_decrement,
                       time.time() - start))

            # convergence criteria
            if norm_delta < precision:
                converging, completed = False, True
            elif previous_ll != 0 and abs(ll - previous_ll) / (
                    -previous_ll) < 1e-09:
                # this is what R uses by default
                converging, completed = False, True
            elif newton_decrement < precision:
                converging, completed = False, True
            elif i >= max_steps:
                # 50 iterations steps with N-R is a lot.
                # Expected convergence is ~10 steps
                converging, completed = False, False
            elif step_size <= 0.00001:
                converging, completed = False, False
            elif abs(ll) < 0.0001 and norm_delta > 1.0:
                warnings.warn(
                    "The log-likelihood is getting suspciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \
See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ",
                    ConvergenceWarning)
                converging, completed = False, False

            step_size = step_sizer.update(norm_delta).next()

            beta += delta
            previous_ll = ll

        self._hessian_ = hessian
        self._score_ = gradient
        self._log_likelihood = ll

        if show_progress and completed:
            print("Convergence completed after %d iterations." % (i))
        if not completed:
            warnings.warn(
                "Newton-Rhapson failed to converge sufficiently in %d steps." %
                max_steps, ConvergenceWarning)

        return beta
Ejemplo n.º 10
0
    def _newton_rhaphson(self, df, stop_times_events, weights, show_progress=False, step_size=None, precision=10e-6,
                         max_steps=50):
        """
        Newton Rhaphson algorithm for fitting CPH model.

        Note that data is assumed to be sorted on T!

        Parameters:
            df: (n, d) Pandas DataFrame of observations
            stop_times_events: (n, d) Pandas DataFrame of meta information about the subjects history
            show_progress: True to show verbous output of convergence
            step_size: float > 0 to determine a starting step size in NR algorithm.
            precision: the convergence halts if the norm of delta between
                     successive positions is less than epsilon.

        Returns:
            beta: (1,d) numpy array.
        """
        assert precision <= 1., "precision must be less than or equal to 1."

        n, d = df.shape

        # make sure betas are correct size.
        beta = np.zeros((d, 1))

        i = 0
        converging = True
        ll, previous_ll = 0, 0
        start = time.time()

        step_sizer = StepSizer(step_size)
        step_size = step_sizer.next()

        while converging:
            i += 1
            h, g, ll = self._get_gradients(df, stop_times_events, weights, beta)

            if self.penalizer > 0:
                # add the gradient and hessian of the l2 term
                g -= self.penalizer * beta.T
                h.flat[::d + 1] -= self.penalizer

            try:
                # reusing a piece to make g * inv(h) * g.T faster later
                inv_h_dot_g_T = spsolve(-h, g.T, sym_pos=True)
            except ValueError as e:
                if 'infs or NaNs' in str(e):
                    raise ConvergenceError("""hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""")
                else:
                    # something else?
                    raise e

            delta = step_size * inv_h_dot_g_T

            if np.any(np.isnan(delta)):
                raise ConvergenceError("""delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""")
            # Save these as pending result
            hessian, gradient = h, g
            norm_delta = norm(delta)
            newton_decrement = g.dot(inv_h_dot_g_T)/2

            if show_progress:
                print("Iteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f" % (i, norm_delta, step_size, ll, newton_decrement, time.time() - start))

            # convergence criteria
            if norm_delta < precision:
                converging, completed = False, True
            elif previous_ll > 0 and abs(ll - previous_ll) / (-previous_ll) < 1e-09:
                # this is what R uses by default
                converging, completed = False, True
            elif newton_decrement < 10e-8:
                converging, completed = False, True
            elif i >= max_steps:
                # 50 iterations steps with N-R is a lot.
                # Expected convergence is less than 10 steps
                converging, completed = False, False
            elif step_size <= 0.0001:
                converging, completed = False, False
            elif abs(ll) < 0.0001 and norm_delta > 1.0:
                warnings.warn("The log-likelihood is getting suspciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \
See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ", ConvergenceWarning)
                converging, completed = False, False

            step_size = step_sizer.update(norm_delta).next()

            beta += delta

        self._hessian_ = hessian
        self._score_ = gradient
        self._log_likelihood = ll

        if show_progress and completed:
            print("Convergence completed after %d iterations." % (i))
        if not completed:
            warnings.warn("Newton-Rhapson failed to converge sufficiently in %d steps." % max_steps, ConvergenceWarning)

        return beta