Example #1
0
    def _hessian_finite_difference(self,
                                   params,
                                   approx_centered=False,
                                   **kwargs):
        params = np.array(params, ndmin=1)

        warnings.warn(
            'Calculation of the Hessian using finite differences'
            ' is usually subject to substantial approximation'
            ' errors.',
            PrecisionWarning,
            stacklevel=3,
        )

        if not approx_centered:
            epsilon = _get_epsilon(params, 3, None, len(params))
        else:
            epsilon = _get_epsilon(params, 4, None, len(params)) / 2
        hessian = approx_fprime(params,
                                self._score_finite_difference,
                                epsilon=epsilon,
                                kwargs=kwargs,
                                centered=approx_centered)

        # TODO: changed this to nobs_effective, has to be changed when merging
        # with statespace mlemodel
        return hessian / (self.nobs_effective)
Example #2
0
def approx_fprime(x, f, epsilon=None, args=(), kwargs=None, centered=True):
    """
    Gradient of function, or Jacobian if function fun returns 1d array

    Parameters
    ----------
    x : array
        parameters at which the derivative is evaluated
    fun : function
        `fun(*((x,)+args), **kwargs)` returning either one value or 1d array
    epsilon : float, optional
        Stepsize, if None, optimal stepsize is used. This is _EPS**(1/2)*x for
        `centered` == False and _EPS**(1/3)*x for `centered` == True.
    args : tuple
        Tuple of additional arguments for function `fun`.
    kwargs : dict
        Dictionary of additional keyword arguments for function `fun`.
    centered : bool
        Whether central difference should be returned. If not, does forward
        differencing.

    Returns
    -------
    grad : array
        gradient or Jacobian

    Notes
    -----
    If fun returns a 1d array, it returns a Jacobian. If a 2d array is returned
    by fun (e.g., with a value for each observation), it returns a 3d array
    with the Jacobian of each observation with shape xk x nobs x xk. I.e.,
    the Jacobian of the first observation would be [:, 0, :]

    """
    kwargs = {} if kwargs is None else kwargs
    x = np.atleast_1d(x).ravel()
    n = len(x)
    f0 = f(*(x, ) + args, **kwargs)
    dim = np.atleast_1d(f0).shape  # it could be a scalar
    grad = np.zeros((n, ) + dim, float)
    ei = np.zeros(np.shape(x), float)
    if not centered:
        epsilon = _get_epsilon(x, 2, epsilon, n)
        for k in range(n):
            ei[k] = epsilon[k]
            grad[k, :] = (f(*(x + ei, ) + args, **kwargs) - f0) / epsilon[k]
            ei[k] = 0.0
    else:
        epsilon = _get_epsilon(x, 3, epsilon, n) / 2.
        for k in range(n):
            ei[k] = epsilon[k]
            grad[k, :] = (f(*(x + ei, ) + args, **kwargs) -
                          f(*(x - ei, ) + args, **kwargs)) / (2 * epsilon[k])
            ei[k] = 0.0
    # return grad
    # return grad.T
    # grad = grad.squeeze()
    axes = list(range(grad.ndim))
    axes[:2] = axes[1::-1]
    return np.transpose(grad, axes=axes)
Example #3
0
 def _score_complex_step(self, params, **kwargs):
     # the default epsilon can be too small
     # inversion_method = INVERT_UNIVARIATE | SOLVE_LU
     epsilon = _get_epsilon(params, 2., None, len(params))
     kwargs['transformed'] = True
     kwargs['complex_step'] = True
     return approx_fprime_cs(params, self.loglike, epsilon=epsilon,
                             kwargs=kwargs)
Example #4
0
def approx_jacobian(x,func,epsilon,*args):
    '''Approximate the Jacobian matrix of callable function func

    Parameters:
      * x       - The state vector at which the Jacobian matrix is desired
      * func    - A vector-valued function of the form f(x,*args)
      * epsilon - The step size used to determine the partial derivatives. Set to None to select 
                  the optimal step size. 
      * *args   - Additional arguments passed to func

    Returns:
         An array of dimensions (lenf, lenx) where lenf is the length
         of the outputs of func, and lenx is the number of inputs of func.

    Notes:
        The approximation is done using fourth order central difference method.
    '''
    if np.shape(x) == ():
        n = 1
        x = np.asarray([x])
    else:
        n = len(x)

    method = 'FirstOrderCentralDifference'
    method = 'FourthOrderCentralDifference'
    
    x0 = np.asarray(x)
    f0 = func(x0, *args)
    
    if method == 'FirstOrderCentralDifference':
        jac = np.zeros([len(x0),len(f0)])
        df1 = np.zeros([len(x0),len(f0)])
        df2 = np.zeros([len(x0),len(f0)])
        dx = np.zeros(len(x0))
        for i in range(len(x0)):
            dx[i] = epsilon
            df1[i] = func(*((x0+dx/2,)+args))
            df2[i] = func(*((x0-dx/2,)+args))
            jac[i] = (df1[i] - df2[i])/epsilon
            dx[i] = 0.0

    if method == 'FourthOrderCentralDifference':
        epsilon = nd._get_epsilon(x,3,epsilon,n)/2.
        jac = np.zeros([len(x0),len(f0)])
        df1 = np.zeros([len(x0),len(f0)])
        df2 = np.zeros([len(x0),len(f0)])
        df3 = np.zeros([len(x0),len(f0)])
        df4 = np.zeros([len(x0),len(f0)])    
        dx = np.zeros(len(x0))
        for i in range(len(x0)):
            dx[i] = epsilon[i]
            df1[i] = -func(*((x0+2*dx,)+args))
            df2[i] = 8*func(*((x0+dx,)+args))
            df3[i] = -8*func(*((x0-dx,)+args))
            df4[i] = func(*((x0-2*dx,)+args))
            jac[i] = (df1[i]+df2[i] + df3[i] + df4[i])/(12*dx[i])
            dx[i] = 0.0
    return jac.transpose()
Example #5
0
def approx_jacobian(x,func,epsilon,*args):
    '''Approximate the Jacobian matrix of callable function func

    Parameters:
      * x       - The state vector at which the Jacobian matrix is desired
      * func    - A vector-valued function of the form f(x,*args)
      * epsilon - The step size used to determine the partial derivatives. Set to None to select 
                  the optimal step size. 
      * *args   - Additional arguments passed to func

    Returns:
         An array of dimensions (lenf, lenx) where lenf is the length
         of the outputs of func, and lenx is the number of inputs of func.

    Notes:
        The approximation is done using fourth order central difference method.
    '''
    if np.shape(x) == ():
        n = 1
        x = np.asarray([x])
    else:
        n = len(x)

    method = 'FirstOrderCentralDifference'
    method = 'FourthOrderCentralDifference'
    
    x0 = np.asarray(x)
    f0 = func(x0, *args)
    
    if method == 'FirstOrderCentralDifference':
        jac = np.zeros([len(x0),len(f0)])
        df1 = np.zeros([len(x0),len(f0)])
        df2 = np.zeros([len(x0),len(f0)])
        dx = np.zeros(len(x0))
        for i in range(len(x0)):
            dx[i] = epsilon
            df1[i] = func(*((x0+dx/2,)+args))
            df2[i] = func(*((x0-dx/2,)+args))
            jac[i] = (df1[i] - df2[i])/epsilon
            dx[i] = 0.0

    if method == 'FourthOrderCentralDifference':
        epsilon = nd._get_epsilon(x,3,epsilon,n)/2.
        jac = np.zeros([len(x0),len(f0)])
        df1 = np.zeros([len(x0),len(f0)])
        df2 = np.zeros([len(x0),len(f0)])
        df3 = np.zeros([len(x0),len(f0)])
        df4 = np.zeros([len(x0),len(f0)])    
        dx = np.zeros(len(x0))
        for i in range(len(x0)):
            dx[i] = epsilon[i]
            df1[i] = -func(*((x0+2*dx,)+args))
            df2[i] = 8*func(*((x0+dx,)+args))
            df3[i] = -8*func(*((x0-dx,)+args))
            df4[i] = func(*((x0-2*dx,)+args))
            jac[i] = (df1[i]+df2[i] + df3[i] + df4[i])/(12*dx[i])
            dx[i] = 0.0
    return jac.transpose()
def arma_scoreobs(endog,
                  ar_params=None,
                  ma_params=None,
                  sigma2=1,
                  prefix=None):
    """
    Compute the score per observation (gradient of the loglikelihood function)

    Parameters
    ----------
    endog : ndarray
        The observed time-series process.
    ar_params : ndarray, optional
        Autoregressive coefficients, not including the zero lag.
    ma_params : ndarray, optional
        Moving average coefficients, not including the zero lag, where the sign
        convention assumes the coefficients are part of the lag polynomial on
        the right-hand-side of the ARMA definition (i.e. they have the same
        sign from the usual econometrics convention in which the coefficients
        are on the right-hand-side of the ARMA definition).
    sigma2 : ndarray, optional
        The ARMA innovation variance. Default is 1.
    prefix : str, optional
        The BLAS prefix associated with the datatype. Default is to find the
        best datatype based on given input. This argument is typically only
        used internally.

    Returns
    ----------
    scoreobs : array
        Score per observation, evaluated at the given parameters.

    Notes
    -----
    This is a numerical approximation, calculated using first-order complex
    step differentiation on the `arma_loglike` method.
    """
    ar_params = [] if ar_params is None else ar_params
    ma_params = [] if ma_params is None else ma_params

    p = len(ar_params)
    q = len(ma_params)

    def func(params):
        return arma_loglikeobs(endog, params[:p], params[p:p + q],
                               params[p + q:])

    params0 = np.r_[ar_params, ma_params, sigma2]
    epsilon = _get_epsilon(params0, 2., None, len(params0))
    return approx_fprime_cs(params0, func, epsilon)
Example #7
0
    def _hessian_complex_step(self, params, **kwargs):
        """
        Hessian matrix computed by second-order complex-step differentiation
        on the `loglike` function.
        """
        # the default epsilon can be too small
        epsilon = _get_epsilon(params, 3., None, len(params))
        kwargs['transformed'] = True
        kwargs['complex_step'] = True
        hessian = approx_hess_cs(
            params, self.loglike, epsilon=epsilon, kwargs=kwargs)

        # TODO: changed this to nobs_effective, has to be changed when merging
        # with statespace mlemodel
        return hessian / (self.nobs_effective)
def arma_scoreobs(endog, ar_params=None, ma_params=None, sigma2=1,
                  prefix=None):
    """
    Compute the score per observation (gradient of the loglikelihood function)

    Parameters
    ----------
    endog : ndarray
        The observed time-series process.
    ar_params : ndarray, optional
        Autoregressive coefficients, not including the zero lag.
    ma_params : ndarray, optional
        Moving average coefficients, not including the zero lag, where the sign
        convention assumes the coefficients are part of the lag polynomial on
        the right-hand-side of the ARMA definition (i.e. they have the same
        sign from the usual econometrics convention in which the coefficients
        are on the right-hand-side of the ARMA definition).
    sigma2 : ndarray, optional
        The ARMA innovation variance. Default is 1.
    prefix : str, optional
        The BLAS prefix associated with the datatype. Default is to find the
        best datatype based on given input. This argument is typically only
        used internally.

    Returns
    ----------
    scoreobs : array
        Score per observation, evaluated at the given parameters.

    Notes
    -----
    This is a numerical approximation, calculated using first-order complex
    step differentiation on the `arma_loglike` method.
    """
    ar_params = [] if ar_params is None else ar_params
    ma_params = [] if ma_params is None else ma_params

    p = len(ar_params)
    q = len(ma_params)

    def func(params):
        return arma_loglikeobs(endog, params[:p], params[p:p + q],
                               params[p + q:])

    params0 = np.r_[ar_params, ma_params, sigma2]
    epsilon = _get_epsilon(params0, 2., None, len(params0))
    return approx_fprime_cs(params0, func, epsilon)
Example #9
0
def approx_fprime_cs(x, f, epsilon=None, args=(), kwargs=None):
    '''
    Calculate gradient or Jacobian with complex step derivative approximation

    Parameters
    ----------
    x : array
        parameters at which the derivative is evaluated
    f : function
        `f(*((x,)+args), **kwargs)` returning either one value or 1d array
    epsilon : float, optional
        Stepsize, if None, optimal stepsize is used. Optimal step-size is
        EPS*x. See note.
    args : tuple
        Tuple of additional arguments for function `f`.
    kwargs : dict
        Dictionary of additional keyword arguments for function `f`.

    Returns
    -------
    partials : ndarray
       array of partial derivatives, Gradient or Jacobian

    Notes
    -----
    The complex-step derivative has truncation error O(epsilon**2), so
    truncation error can be eliminated by choosing epsilon to be very small.
    The complex-step derivative avoids the problem of round-off error with
    small epsilon because there is no subtraction.
    '''
    # From Guilherme P. de Freitas, numpy mailing list
    # May 04 2010 thread "Improvement of performance"
    # http://mail.scipy.org/pipermail/numpy-discussion/2010-May/050250.html
    kwargs = {} if kwargs is None else kwargs
    x = np.atleast_1d(x).ravel()
    n = len(x)
    epsilon = _get_epsilon(x, 1, epsilon, n)
    increments = np.identity(n) * 1j * epsilon
    # TODO: see if this can be vectorized, but usually dim is small
    partials = [
        f(x + ih, *args, **kwargs).imag / epsilon[i]
        for i, ih in enumerate(increments)
    ]
    axes = list(range(partials[0].ndim + 1))
    axes[:2] = axes[1::-1]
    return np.transpose(partials, axes=axes)
Example #10
0
def _approx_hess1_backward(x, f, epsilon=None, args=(), kwargs=None):
    n = len(x)
    epsilon = -np.abs(_get_epsilon(x, 3, epsilon, n))
    return approx_hess1(x, f, epsilon, args, kwargs, centered=False)
    def observed_information_matrix(self, params, **kwargs):
        """
        Observed information matrix

        Parameters
        ----------
        params : array_like, optional
            Array of parameters at which to evaluate the loglikelihood
            function.
        **kwargs
            Additional keyword arguments to pass to the Kalman filter. See
            `KalmanFilter.filter` for more details.

        Notes
        -----
        This method is from Harvey (1989), which shows that the information
        matrix only depends on terms from the gradient. This implementation is
        partially analytic and partially numeric approximation, therefore,
        because it uses the analytic formula for the information matrix, with
        numerically computed elements of the gradient.

        References
        ----------
        Harvey, Andrew C. 1990.
        Forecasting, Structural Time Series Models and the Kalman Filter.
        Cambridge University Press.

        """
        # Setup
        n = len(params)
        epsilon = _get_epsilon(params, 1, None, n)
        increments = np.identity(n) * 1j * epsilon

        # Get values at the params themselves
        self.update(params)
        res = self.ssm.filter(**kwargs)
        dtype = self.ssm.dtype
        # Save this for inversion later
        inv_forecasts_error_cov = res.forecasts_error_cov.copy()

        # Compute partial derivatives
        partials_forecasts_error = (
            np.zeros((self.k_endog, self.nobs, n))
        )
        partials_forecasts_error_cov = (
            np.zeros((self.k_endog, self.k_endog, self.nobs, n))
        )
        for i, ih in enumerate(increments):
            self.update(params + ih)
            res = self.ssm.filter(**kwargs)

            partials_forecasts_error[:, :, i] = (
                res.forecasts_error.imag / epsilon[i]
            )

            partials_forecasts_error_cov[:, :, :, i] = (
                res.forecasts_error_cov.imag / epsilon[i]
            )

        # Compute the information matrix
        tmp = np.zeros((self.k_endog, self.k_endog, self.nobs, n), dtype=dtype)

        information_matrix = np.zeros((n, n), dtype=dtype)
        for t in range(self.ssm.loglikelihood_burn, self.nobs):
            inv_forecasts_error_cov[:, :, t] = (
                np.linalg.inv(inv_forecasts_error_cov[:, :, t])
            )
            for i in range(n):
                tmp[:, :, t, i] = np.dot(
                    inv_forecasts_error_cov[:, :, t],
                    partials_forecasts_error_cov[:, :, t, i]
                )
            for i in range(n):
                for j in range(n):
                    information_matrix[i, j] += (
                        0.5 * np.trace(np.dot(tmp[:, :, t, i],
                                              tmp[:, :, t, j]))
                    )
                    information_matrix[i, j] += np.inner(
                        partials_forecasts_error[:, t, i],
                        np.dot(inv_forecasts_error_cov[:,:,t],
                               partials_forecasts_error[:, t, j])
                    )
        return information_matrix / (self.nobs - self.ssm.loglikelihood_burn)
Example #12
0
def _approx_fprime_backward(x, f, epsilon=None, args=(), kwargs=None):
    x = np.atleast_1d(x).ravel()
    n = len(x)
    epsilon = -np.abs(_get_epsilon(x, 2, epsilon, n))
    return approx_fprime(x, f, epsilon, args, kwargs, centered=False)
Example #13
0
def _approx_hess1_backward(x, f, epsilon=None, args=(), kwargs=None):
    n = len(x)
    kwargs = {} if kwargs is None else kwargs
    epsilon = -np.abs(_get_epsilon(x, 3, epsilon, n))
    return approx_hess1(x, f, epsilon, args, kwargs)
Example #14
0
    def observed_information_matrix(self, params, **kwargs):
        """
        Observed information matrix

        Parameters
        ----------
        params : array_like, optional
            Array of parameters at which to evaluate the loglikelihood
            function.
        **kwargs
            Additional keyword arguments to pass to the Kalman filter. See
            `KalmanFilter.filter` for more details.

        Notes
        -----
        This method is from Harvey (1989), which shows that the information
        matrix only depends on terms from the gradient. This implementation is
        partially analytic and partially numeric approximation, therefore,
        because it uses the analytic formula for the information matrix, with
        numerically computed elements of the gradient.

        References
        ----------
        Harvey, Andrew C. 1990.
        Forecasting, Structural Time Series Models and the Kalman Filter.
        Cambridge University Press.

        """
        # Setup
        n = len(params)
        epsilon = _get_epsilon(params, 1, None, n)
        increments = np.identity(n) * 1j * epsilon

        kwargs['results'] = FilterResults

        # Get values at the params themselves
        self.update(params)
        res = self.filter(**kwargs)
        dtype = self.dtype
        # Save this for inversion later
        inv_forecasts_error_cov = res.forecasts_error_cov.copy()

        # Compute partial derivatives
        partials_forecasts_error = (np.zeros((self.k_endog, self.nobs, n)))
        partials_forecasts_error_cov = (np.zeros(
            (self.k_endog, self.k_endog, self.nobs, n)))
        for i, ih in enumerate(increments):
            self.update(params + ih)
            res = self.filter(**kwargs)

            partials_forecasts_error[:, :, i] = (res.forecasts_error.imag /
                                                 epsilon[i])

            partials_forecasts_error_cov[:, :, :,
                                         i] = (res.forecasts_error_cov.imag /
                                               epsilon[i])

        # Compute the information matrix
        tmp = np.zeros((self.k_endog, self.k_endog, self.nobs, n), dtype=dtype)

        information_matrix = np.zeros((n, n), dtype=dtype)
        for t in range(self.loglikelihood_burn, self.nobs):
            inv_forecasts_error_cov[:, :, t] = (np.linalg.inv(
                inv_forecasts_error_cov[:, :, t]))
            for i in range(n):
                tmp[:, :, t,
                    i] = np.dot(inv_forecasts_error_cov[:, :, t],
                                partials_forecasts_error_cov[:, :, t, i])
            for i in range(n):
                for j in range(n):
                    information_matrix[i, j] += (
                        0.5 *
                        np.trace(np.dot(tmp[:, :, t, i], tmp[:, :, t, j])))
                    information_matrix[i, j] += np.inner(
                        partials_forecasts_error[:, t, i],
                        np.dot(inv_forecasts_error_cov[:, :, t],
                               partials_forecasts_error[:, t, j]))
        return information_matrix / (self.nobs - self.loglikelihood_burn)
Example #15
0
def approx_hess(x, f, epsilon=None, args=(), kwargs={}):
    """
    Parameters
    ----------
    x : array_like
       value at which function derivative is evaluated
    f : function
       function of one array f(x, `*args`, `**kwargs`)
    epsilon : float or array-like, optional
       Stepsize used, if None, then stepsize is automatically chosen
       according to EPS**(1/4)*x.
    args : tuple
        Arguments for function `f`.
    kwargs : dict
        Keyword arguments for function `f`.


    Returns
    -------
    hess : ndarray
       array of partial second derivatives, Hessian


    Notes
    -----
    Equation (9) in Ridout. Computes the Hessian as::

      1/(4*d_j*d_k) * ((f(x + d[j]*e[j] + d[k]*e[k]) - f(x + d[j]*e[j]
                                                     - d[k]*e[k])) -
                 (f(x - d[j]*e[j] + d[k]*e[k]) - f(x - d[j]*e[j]
                                                     - d[k]*e[k]))

    where e[j] is a vector with element j == 1 and the rest are zero and
    d[i] is epsilon[i].

    References
    ----------:

    Ridout, M.S. (2009) Statistical applications of the complex-step method
        of numerical differentiation. The American Statistician, 63, 66-74

    Copyright
    ---------
    This is an adaptation of the function approx_hess3() in
    statsmodels.tools.numdiff. That code is BSD (3 clause) licensed as
    follows:

    Copyright (C) 2006, Jonathan E. Taylor
    All rights reserved.

    Copyright (c) 2006-2008 Scipy Developers.
    All rights reserved.

    Copyright (c) 2009-2012 Statsmodels Developers.
    All rights reserved.


    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:

      a. Redistributions of source code must retain the above copyright notice,
         this list of conditions and the following disclaimer.
      b. Redistributions in binary form must reproduce the above copyright
         notice, this list of conditions and the following disclaimer in the
         documentation and/or other materials provided with the distribution.
      c. Neither the name of Statsmodels nor the names of its contributors
         may be used to endorse or promote products derived from this software
         without specific prior written permission.


    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    ARE DISCLAIMED. IN NO EVENT SHALL STATSMODELS OR CONTRIBUTORS BE LIABLE FOR
    ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
    DAMAGE.
    """
    n = len(x)
    h = smnd._get_epsilon(x, 4, epsilon, n)
    ee = np.diag(h)
    hess = np.outer(h, h)

    for i in range(n):
        for j in range(i, n):
            hess[i, j] = (f(*((x + ee[i, :] + ee[j, :], ) + args), **kwargs) -
                          f(*((x + ee[i, :] - ee[j, :], ) + args), **kwargs) -
                          (f(*((x - ee[i, :] + ee[j, :], ) + args), **kwargs) -
                           f(*((x - ee[i, :] - ee[j, :], ) + args), **kwargs))
                          ) / (4. * hess[i, j])
            hess[j, i] = hess[i, j]
    return hess
Example #16
0
def approx_hess(x, f, epsilon=None, args=(), kwargs={}):
    """
    Parameters
    ----------
    x : array_like
       value at which function derivative is evaluated
    f : function
       function of one array f(x, `*args`, `**kwargs`)
    epsilon : float or array-like, optional
       Stepsize used, if None, then stepsize is automatically chosen
       according to EPS**(1/4)*x.
    args : tuple
        Arguments for function `f`.
    kwargs : dict
        Keyword arguments for function `f`.


    Returns
    -------
    hess : ndarray
       array of partial second derivatives, Hessian


    Notes
    -----
    Equation (9) in Ridout. Computes the Hessian as::

      1/(4*d_j*d_k) * ((f(x + d[j]*e[j] + d[k]*e[k]) - f(x + d[j]*e[j]
                                                     - d[k]*e[k])) -
                 (f(x - d[j]*e[j] + d[k]*e[k]) - f(x - d[j]*e[j]
                                                     - d[k]*e[k]))

    where e[j] is a vector with element j == 1 and the rest are zero and
    d[i] is epsilon[i].

    References
    ----------:

    Ridout, M.S. (2009) Statistical applications of the complex-step method
        of numerical differentiation. The American Statistician, 63, 66-74

    Copyright
    ---------
    This is an adaptation of the function approx_hess3() in
    statsmodels.tools.numdiff. That code is BSD (3 clause) licensed as
    follows:

    Copyright (C) 2006, Jonathan E. Taylor
    All rights reserved.

    Copyright (c) 2006-2008 Scipy Developers.
    All rights reserved.

    Copyright (c) 2009-2012 Statsmodels Developers.
    All rights reserved.


    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:

      a. Redistributions of source code must retain the above copyright notice,
         this list of conditions and the following disclaimer.
      b. Redistributions in binary form must reproduce the above copyright
         notice, this list of conditions and the following disclaimer in the
         documentation and/or other materials provided with the distribution.
      c. Neither the name of Statsmodels nor the names of its contributors
         may be used to endorse or promote products derived from this software
         without specific prior written permission.


    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    ARE DISCLAIMED. IN NO EVENT SHALL STATSMODELS OR CONTRIBUTORS BE LIABLE FOR
    ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
    DAMAGE.
    """
    n = len(x)
    h = smnd._get_epsilon(x, 4, epsilon, n)
    ee = np.diag(h)
    hess = np.outer(h,h)

    for i in range(n):
        for j in range(i, n):
            hess[i, j] = (f(*((x + ee[i, :] + ee[j, :],) + args), **kwargs)
                          - f(*((x + ee[i, :] - ee[j, :],) + args), **kwargs)
                          - (f(*((x - ee[i, :] + ee[j, :],) + args), **kwargs)
                          - f(*((x - ee[i, :] - ee[j, :],) + args), **kwargs))
                          )/(4.*hess[i, j])
            hess[j, i] = hess[i, j]
    return hess