def _profileFhessian(xhat, i, alpha, obj, approx=True):
    c = obj.cost(xhat) + 0.5 * qchisq(1-alpha, df=1)

    def func(x):
        if approx:
            H, output = obj.jtj(x, full_output=True)
        else:
            H, output = obj.hessian(x, full_output=True)

        g = output['grad']
        G = H.copy()
        G[i] = g.copy()
        lvector = g.copy()
        lvector[i] = obj.cost(x) - c
        if approx:
            return 2 * G.T.dot(G)
        else:
            A = 2 * G.T.dot(G)
            # here we assume that only the second derivative is
            # significant.
            for s in lvector:
                A += s*H
            #return numpy.diag(lvector).dot(H) + 2 * G.T.dot(G)
            # return 2 * G.T.dot(G)
            return A

    return func
Beispiel #2
0
def _profileFhessian(xhat, i, alpha, obj, approx=True):
    c = obj.cost(xhat) + 0.5 * qchisq(1 - alpha, df=1)

    def func(x):
        if approx:
            H, output = obj.jtj(x, full_output=True)
        else:
            H, output = obj.hessian(x, full_output=True)

        g = output['grad']
        G = H.copy()
        G[i] = g.copy()
        lvector = g.copy()
        lvector[i] = obj.cost(x) - c
        A = 2 * G.T.dot(G)
        if not approx:
            for s in lvector:
                A += s * H
        return A

#         if approx:
#             return 2*G.T.dot(G)
#         else:
#             A = 2*G.T.dot(G)
#             ## here we assume that only the second derivative is
#             ## significant.
#             for s in lvector:
#                 A += s*H
#             ## return np.diag(lvector).dot(H) + 2 * G.T.dot(G)
#             ## return 2 * G.T.dot(G)
#             return A

    return func
Beispiel #3
0
def _profileGetInitialValues(theta, i, alpha, obj, approx=True):
    '''
    We would not use an approximation in general because if theta
    is an optimal value, then we would expect the Hessian to be
    a PSD matrix.
    '''
    p = len(theta)
    setIndex = set(range(p))

    if approx:
        H = obj.jtj(theta)
    else:
        H = obj.hessian(theta)

    activeIndex = list(setIndex - set([i]))
    tau = numpy.ones(p)
    dwdb = -numpy.linalg.lstsq(H[activeIndex][:,activeIndex],H[i,activeIndex])[0]
    tau[activeIndex] = dwdb

    h = numpy.sqrt(qchisq(1-alpha, df=1) / (H[i,i] + (H[i,activeIndex].T).dot(dwdb)))

    # we only move a half step and not a full step as a more
    # conservative approach is less likely to have shoot out of bounds
    xhatU = theta + 0.5 * h * tau
    xhatL = theta - 0.5 * h * tau

    return xhatL, xhatU
Beispiel #4
0
def _profileG(xhat, i, alpha, obj):
    c = obj.cost(xhat) + 0.5 * qchisq(1-alpha, df=1)
    def func(x):
        r = obj.gradient(x)
        r[i] = obj.cost(x) - c
        return r

    return func
Beispiel #5
0
def asymptotic(obj, alpha=None, theta=None, lb=None, ub=None):
    '''
    Finds the confidence interval at the :math:`\\alpha` level
    under the :math:`\\mathcal{X}^{2}` assumption for the
    likelihood

    Parameters
    ----------
    obj: ode object
        an object initialized from :class:`OperateOdeModel`
    alpha: numeric
        confidence level, :math:`0 < \\alpha < 1`
    theta: array like
        the MLE parameters
    lb: array like
        expected lower bound
    ub: array like
        expected upper bound

    Returns
    -------
    l: array like
        lower confidence interval
    u: array like
        upper confidence interval

    '''
    
    alpha, theta, lb, ub = _checkInput(obj, alpha, theta, lb, ub)

    H = obj.hessian(theta)
    if numpy.any(numpy.linalg.eig(H)[0] <=0.0):
        H = obj.jtj(theta)
        # H = obj.fisherInformation(theta)

    I = numpy.linalg.inv(H)

    xU = theta + numpy.sqrt(0.5 * qchisq(1-alpha, df=1) * numpy.diag(I))
    xL = theta - numpy.sqrt(0.5 * qchisq(1-alpha, df=1) * numpy.diag(I))

    return xL, xU
Beispiel #6
0
def _profileGetInitialValues(theta,
                             i,
                             alpha,
                             obj,
                             approx=True,
                             lb=None,
                             ub=None):
    '''
    We would not use an approximation in general because if the input theta
    is an optimal value, then we would expect the Hessian to be a PSD matrix.
    '''
    p = len(theta)
    setIndex = set(range(p))

    H = obj.jtj(theta) if approx == True else obj.hessian(theta)
    # if approx:
    #     H = obj.jtj(theta)
    # else:
    #     H = obj.hessian(theta)

    activeIndex = list(setIndex - set([i]))
    tau = np.ones(p)
    dwdb = -np.linalg.lstsq(H[activeIndex][:, activeIndex],
                            H[i, activeIndex],
                            rcond=None)[0]  #To silence the FutureWarning)[0]
    tau[activeIndex] = dwdb

    q = qchisq(1 - alpha, df=1)
    h = np.sqrt(q / (H[i, i] + (H[i, activeIndex].T).dot(dwdb)))

    # we only move a half step and not a full step as a more
    # conservative approach is less likely to have shoot out of bounds
    xhatU = theta + 0.5 * h * tau
    xhatL = theta - 0.5 * h * tau

    if lb is not None:
        for i, lb_i in enumerate(lb):
            if xhatL[i] <= lb_i: xhatL[i] = lb_i

    if ub is not None:
        for i, ub_i in enumerate(ub):
            if xhatU[i] >= ub_i: xhatU[i] = ub_i

    return xhatL, xhatU
Beispiel #7
0
def _profileFgradient(xhat, i, alpha, obj, approx=True):
    c = obj.cost(xhat) + 0.5 * qchisq(1 - alpha, df=1)

    def func(x):
        if approx:
            H, output = obj.jtj(x, full_output=True)
        else:
            H, output = obj.hessian(x, full_output=True)

        g = output['grad']
        G = H.copy()
        G[i] = g.copy()
        lvector = g.copy()
        lvector[i] = obj.cost(x) - c
        # note that now G is the Jacobian of the objective function
        # so we need a transpose
        return G.T.dot(2 * lvector)

    return func
def _profileFgradient(xhat, i, alpha, obj, approx=True):
    c = obj.cost(xhat) + 0.5 * qchisq(1-alpha, df=1)

    def func(x):
        if approx:
            H, output = obj.jtj(x, full_output=True)
        else:
            H, output = obj.hessian(x, full_output=True)

        g = output['grad']
        G = H.copy()
        G[i] = g.copy()
        lvector = g.copy()
        lvector[i] = obj.cost(x) - c
        # note that now G is the Jacobian of the objective function
        # so we need a transpose
        return G.T.dot(2*lvector)

    return func
Beispiel #9
0
def asymptotic(obj, alpha=0.05, theta=None, lb=None, ub=None):
    '''
    Finds the confidence interval at the :math:`\\alpha` level
    under the :math:`\\mathcal{X}^{2}` assumption for the
    likelihood

    Parameters
    ----------
    obj: ode object
        an object initialized from :class:`BaseLoss`
    alpha: numeric, optional
        confidence level, :math:`0 < \\alpha < 1`.  Defaults to 0.05.
    theta: array like, optional
        the MLE parameters.  Defaults to None which then theta will be
        inferred from the input obj
    lb: array like, optional
        expected lower bound
    ub: array like, optional
        expected upper bound

    Returns
    -------
    l: array like
        lower confidence interval
    u: array like
        upper confidence interval
    '''

    alpha, theta, lb, ub = _checkInput(obj, alpha, theta, lb, ub)

    H = obj.hessian(theta)
    if np.any(np.linalg.eig(H)[0] <= 0.0):
        H = obj.jtj(theta)
        ## H = obj.fisher_information(theta)

    I = np.linalg.inv(H)

    q = 0.5 * qchisq(1 - alpha, df=1)
    xU = theta + np.sqrt(q * np.diag(I))
    xL = theta - np.sqrt(q * np.diag(I))

    return xL, xU
Beispiel #10
0
def asymptotic(obj, alpha=0.05, theta=None, lb=None, ub=None):
    '''
    Finds the confidence interval at the :math:`\\alpha` level
    under the :math:`\\mathcal{X}^{2}` assumption for the
    likelihood

    Parameters
    ----------
    obj: ode object
        an object initialized from :class:`BaseLoss`
    alpha: numeric, optional
        confidence level, :math:`0 < \\alpha < 1`.  Defaults to 0.05.
    theta: array like, optional
        the MLE parameters.  Defaults to None which then theta will be
        inferred from the input obj
    lb: array like, optional
        expected lower bound
    ub: array like, optional
        expected upper bound

    Returns
    -------
    l: array like
        lower confidence interval
    u: array like
        upper confidence interval
    '''

    alpha, theta, lb, ub = _checkInput(obj, alpha, theta, lb, ub)

    H = obj.hessian(theta)
    if np.any(np.linalg.eig(H)[0] <= 0.0):
        H = obj.jtj(theta)
        ## H = obj.fisher_information(theta)

    I = np.linalg.inv(H)

    q = 0.5*qchisq(1 - alpha, df=1)
    xU = theta + np.sqrt(q*np.diag(I))
    xL = theta - np.sqrt(q*np.diag(I))

    return xL, xU
Beispiel #11
0
def _profileGetInitialValues(theta, i, alpha, obj, approx=True,
                             lb=None, ub=None):
    '''
    We would not use an approximation in general because if the input theta
    is an optimal value, then we would expect the Hessian to be a PSD matrix.
    '''
    p = len(theta)
    setIndex = set(range(p))

    H = obj.jtj(theta) if approx == True else obj.hessian(theta)
    # if approx:
    #     H = obj.jtj(theta)
    # else:
    #     H = obj.hessian(theta)

    activeIndex = list(setIndex - set([i]))
    tau = np.ones(p)
    dwdb = -np.linalg.lstsq(H[activeIndex][:, activeIndex],H[i, activeIndex])[0]
    tau[activeIndex] = dwdb

    q = qchisq(1 - alpha, df=1)
    h = np.sqrt(q/(H[i, i] + (H[i, activeIndex].T).dot(dwdb)))

    # we only move a half step and not a full step as a more
    # conservative approach is less likely to have shoot out of bounds
    xhatU = theta + 0.5*h*tau
    xhatL = theta - 0.5*h*tau

    if lb is not None:
        for i, lb_i in enumerate(lb):
            if xhatL[i] <= lb_i: xhatL[i] = lb_i

    if ub is not None:
        for i, ub_i in enumerate(ub):
            if xhatU[i] >= ub_i: xhatU[i] = ub_i

    return xhatL, xhatU
Beispiel #12
0
def _profileGSecondOrderCorrection(xhat, i, alpha, obj, approx=True):
    '''
    Finds the correction term when approximating the gradient to
    second order [Venzon1988]_, i.e. :math:`\\delta^{\\top} D(\\theta) \\delta`
    in [Venzon1988]_.  If the system
    of non-linear equations is a, then we return :math:`a + s`
    instead of :math:`G^{-1}a`, i.e. we have incorporated the correction
    into the gradient

    Parameters
    ----------
    x: array like
        current value of the parameters
    xhat: array like
        parameters at MLE
    i: int
        our target variable
    alpha: numeric
        confidence level, between :math:`(0,1)`
    obj:
        ode object
    approx: bool, optional
        default is True.

    Returns
    -------
    g: array like
        corrected set of non-linear equations

    '''
    s = sympy.symbols('s')
    c = obj.cost(xhat) + 0.5 * qchisq(1 - alpha, df=1)
    D0 = obj.hessian(xhat)

    def func(x):
        # first, we obtain all the necessary information.
        # We use the notation in the original paper.
        # so that G is the derivative of the systems of
        # equations and JTJ is D(\theta)
        if approx:
            H, output = obj.jtj(x, full_output=True)
        else:
            H, output = obj.hessian(x, full_output=True)

        g = output['grad']
        G = H.copy()
        G[i] = g
        lvector = g.copy()
        lvector[i] = obj.cost(x) - c

        # computing the inverse, even though it is less
        # accurate then doing a least squares, we are saving
        # a lot of computation time here
        invG = np.linalg.inv(G)
        v = invG.dot(lvector)

        sTemp = v + sympy.Matrix(invG[:, i]) * s
        RHS = (sTemp.T * sympy.Matrix(H) * sTemp)[0]
        sRoots = sympy.solve(sympy.Eq(2 * s, RHS), s)
        abc = sympy.lambdify((), sympy.Matrix(sRoots), 'np')
        sRootsReal = np.asarray(abc()).real
        rootsSize = sRootsReal.size

        if rootsSize > 0:
            distL = np.zeros(len(sRootsReal))
            for j in range(rootsSize):
                vTemp = v.copy() + sRootsReal[j] * invG[:, i]
                distL[j] = vTemp.T.dot(D0.dot(vTemp))
                # finish finding the distance
            index = distL.argmin()
            lvector[i] += sRootsReal[index]
            return lvector
        else:
            return lvector

    return func
def _profileGSecondOrderCorrection(xhat, i, alpha, obj, approx=True):
    '''
    Finds the correction term when approximating the gradient to
    second order, i.e. :math:`\\delta^{\top} D(\\theta) \\delta`
    in [1].  If the system
    of non-linear equations is a, then we return :math:`a + s`
    instead of G^{-1}a, i.e. we have incorporated the correction
    into the gradient
    
    Parameters
    ----------
    x: array like
        current value of the parameters
    xhat: array like
        parameters at MLE
    i: int
        our target variable
    alpha: numeric
        confidence level, between (0,1)
    obj:
        ode object
    approx: bool, optional
        default is True.
    
    Returns
    -------
    g: array like
        corrected set of non-linear equations
        
    References
    ----------
    .. [1] Venzon and Moolgavkar, A Method for Computing
           Profile-Likelihood-Based Confidence Intervals,
           Journal of the Royal Statistical Society, Series
           C, Vol 37, No. 1, 1988, 87-94.
    '''
    s = sympy.symbols('s')
    c = obj.cost(xhat) + 0.5 * qchisq(1-alpha, df=1)
    D0 = obj.hessian(xhat)

    def func(x):
        # first, we obtain all the necessary information.
        # We use the notation in the original paper.
        # so that G is the derivative of the systems of
        # equations and JTJ is D(\theta)
        if approx:
            H,output = obj.jtj(x,full_output=True)
        else:
            H, output = obj.hessian(x, full_output=True)
         
        g = output['grad']
        G = H.copy()
        G[i] = g
        lvector = g.copy()
        lvector[i] = obj.cost(x) - c

        # computing the inverse, even though it is less
        # accurate then doing a least squares, we are saving
        # a lot of computation time here
        invG = numpy.linalg.inv(G)
        v = invG.dot(lvector)
        
        sTemp = v + sympy.Matrix(invG[:,i])*s
        RHS = (sTemp.T * sympy.Matrix(H) * sTemp)[0]
        sRoots = sympy.solve(sympy.Eq(2*s, RHS), s)
        abc = sympy.lambdify((),sympy.Matrix(sRoots), 'numpy')
        sRootsReal = numpy.asarray(abc()).real
        rootsSize = sRootsReal.size

        if rootsSize > 0:
            distL = numpy.zeros(len(sRootsReal))
            for j in range(rootsSize):
                vTemp = v.copy() + sRootsReal[j] * invG[:,i]
                distL[j] = vTemp.T.dot(D0.dot(vTemp))
                # finish finding the distance
            index = distL.argmin()
            lvector[i] += sRootsReal[index]
            return lvector
        else:
            return lvector
        
    return func