def _profileFhessian(xhat, i, alpha, obj, approx=True): c = obj.cost(xhat) + 0.5 * qchisq(1-alpha, df=1) def func(x): if approx: H, output = obj.jtj(x, full_output=True) else: H, output = obj.hessian(x, full_output=True) g = output['grad'] G = H.copy() G[i] = g.copy() lvector = g.copy() lvector[i] = obj.cost(x) - c if approx: return 2 * G.T.dot(G) else: A = 2 * G.T.dot(G) # here we assume that only the second derivative is # significant. for s in lvector: A += s*H #return numpy.diag(lvector).dot(H) + 2 * G.T.dot(G) # return 2 * G.T.dot(G) return A return func
def _profileFhessian(xhat, i, alpha, obj, approx=True): c = obj.cost(xhat) + 0.5 * qchisq(1 - alpha, df=1) def func(x): if approx: H, output = obj.jtj(x, full_output=True) else: H, output = obj.hessian(x, full_output=True) g = output['grad'] G = H.copy() G[i] = g.copy() lvector = g.copy() lvector[i] = obj.cost(x) - c A = 2 * G.T.dot(G) if not approx: for s in lvector: A += s * H return A # if approx: # return 2*G.T.dot(G) # else: # A = 2*G.T.dot(G) # ## here we assume that only the second derivative is # ## significant. # for s in lvector: # A += s*H # ## return np.diag(lvector).dot(H) + 2 * G.T.dot(G) # ## return 2 * G.T.dot(G) # return A return func
def _profileGetInitialValues(theta, i, alpha, obj, approx=True): ''' We would not use an approximation in general because if theta is an optimal value, then we would expect the Hessian to be a PSD matrix. ''' p = len(theta) setIndex = set(range(p)) if approx: H = obj.jtj(theta) else: H = obj.hessian(theta) activeIndex = list(setIndex - set([i])) tau = numpy.ones(p) dwdb = -numpy.linalg.lstsq(H[activeIndex][:,activeIndex],H[i,activeIndex])[0] tau[activeIndex] = dwdb h = numpy.sqrt(qchisq(1-alpha, df=1) / (H[i,i] + (H[i,activeIndex].T).dot(dwdb))) # we only move a half step and not a full step as a more # conservative approach is less likely to have shoot out of bounds xhatU = theta + 0.5 * h * tau xhatL = theta - 0.5 * h * tau return xhatL, xhatU
def _profileG(xhat, i, alpha, obj): c = obj.cost(xhat) + 0.5 * qchisq(1-alpha, df=1) def func(x): r = obj.gradient(x) r[i] = obj.cost(x) - c return r return func
def asymptotic(obj, alpha=None, theta=None, lb=None, ub=None): ''' Finds the confidence interval at the :math:`\\alpha` level under the :math:`\\mathcal{X}^{2}` assumption for the likelihood Parameters ---------- obj: ode object an object initialized from :class:`OperateOdeModel` alpha: numeric confidence level, :math:`0 < \\alpha < 1` theta: array like the MLE parameters lb: array like expected lower bound ub: array like expected upper bound Returns ------- l: array like lower confidence interval u: array like upper confidence interval ''' alpha, theta, lb, ub = _checkInput(obj, alpha, theta, lb, ub) H = obj.hessian(theta) if numpy.any(numpy.linalg.eig(H)[0] <=0.0): H = obj.jtj(theta) # H = obj.fisherInformation(theta) I = numpy.linalg.inv(H) xU = theta + numpy.sqrt(0.5 * qchisq(1-alpha, df=1) * numpy.diag(I)) xL = theta - numpy.sqrt(0.5 * qchisq(1-alpha, df=1) * numpy.diag(I)) return xL, xU
def _profileGetInitialValues(theta, i, alpha, obj, approx=True, lb=None, ub=None): ''' We would not use an approximation in general because if the input theta is an optimal value, then we would expect the Hessian to be a PSD matrix. ''' p = len(theta) setIndex = set(range(p)) H = obj.jtj(theta) if approx == True else obj.hessian(theta) # if approx: # H = obj.jtj(theta) # else: # H = obj.hessian(theta) activeIndex = list(setIndex - set([i])) tau = np.ones(p) dwdb = -np.linalg.lstsq(H[activeIndex][:, activeIndex], H[i, activeIndex], rcond=None)[0] #To silence the FutureWarning)[0] tau[activeIndex] = dwdb q = qchisq(1 - alpha, df=1) h = np.sqrt(q / (H[i, i] + (H[i, activeIndex].T).dot(dwdb))) # we only move a half step and not a full step as a more # conservative approach is less likely to have shoot out of bounds xhatU = theta + 0.5 * h * tau xhatL = theta - 0.5 * h * tau if lb is not None: for i, lb_i in enumerate(lb): if xhatL[i] <= lb_i: xhatL[i] = lb_i if ub is not None: for i, ub_i in enumerate(ub): if xhatU[i] >= ub_i: xhatU[i] = ub_i return xhatL, xhatU
def _profileFgradient(xhat, i, alpha, obj, approx=True): c = obj.cost(xhat) + 0.5 * qchisq(1 - alpha, df=1) def func(x): if approx: H, output = obj.jtj(x, full_output=True) else: H, output = obj.hessian(x, full_output=True) g = output['grad'] G = H.copy() G[i] = g.copy() lvector = g.copy() lvector[i] = obj.cost(x) - c # note that now G is the Jacobian of the objective function # so we need a transpose return G.T.dot(2 * lvector) return func
def _profileFgradient(xhat, i, alpha, obj, approx=True): c = obj.cost(xhat) + 0.5 * qchisq(1-alpha, df=1) def func(x): if approx: H, output = obj.jtj(x, full_output=True) else: H, output = obj.hessian(x, full_output=True) g = output['grad'] G = H.copy() G[i] = g.copy() lvector = g.copy() lvector[i] = obj.cost(x) - c # note that now G is the Jacobian of the objective function # so we need a transpose return G.T.dot(2*lvector) return func
def asymptotic(obj, alpha=0.05, theta=None, lb=None, ub=None): ''' Finds the confidence interval at the :math:`\\alpha` level under the :math:`\\mathcal{X}^{2}` assumption for the likelihood Parameters ---------- obj: ode object an object initialized from :class:`BaseLoss` alpha: numeric, optional confidence level, :math:`0 < \\alpha < 1`. Defaults to 0.05. theta: array like, optional the MLE parameters. Defaults to None which then theta will be inferred from the input obj lb: array like, optional expected lower bound ub: array like, optional expected upper bound Returns ------- l: array like lower confidence interval u: array like upper confidence interval ''' alpha, theta, lb, ub = _checkInput(obj, alpha, theta, lb, ub) H = obj.hessian(theta) if np.any(np.linalg.eig(H)[0] <= 0.0): H = obj.jtj(theta) ## H = obj.fisher_information(theta) I = np.linalg.inv(H) q = 0.5 * qchisq(1 - alpha, df=1) xU = theta + np.sqrt(q * np.diag(I)) xL = theta - np.sqrt(q * np.diag(I)) return xL, xU
def asymptotic(obj, alpha=0.05, theta=None, lb=None, ub=None): ''' Finds the confidence interval at the :math:`\\alpha` level under the :math:`\\mathcal{X}^{2}` assumption for the likelihood Parameters ---------- obj: ode object an object initialized from :class:`BaseLoss` alpha: numeric, optional confidence level, :math:`0 < \\alpha < 1`. Defaults to 0.05. theta: array like, optional the MLE parameters. Defaults to None which then theta will be inferred from the input obj lb: array like, optional expected lower bound ub: array like, optional expected upper bound Returns ------- l: array like lower confidence interval u: array like upper confidence interval ''' alpha, theta, lb, ub = _checkInput(obj, alpha, theta, lb, ub) H = obj.hessian(theta) if np.any(np.linalg.eig(H)[0] <= 0.0): H = obj.jtj(theta) ## H = obj.fisher_information(theta) I = np.linalg.inv(H) q = 0.5*qchisq(1 - alpha, df=1) xU = theta + np.sqrt(q*np.diag(I)) xL = theta - np.sqrt(q*np.diag(I)) return xL, xU
def _profileGetInitialValues(theta, i, alpha, obj, approx=True, lb=None, ub=None): ''' We would not use an approximation in general because if the input theta is an optimal value, then we would expect the Hessian to be a PSD matrix. ''' p = len(theta) setIndex = set(range(p)) H = obj.jtj(theta) if approx == True else obj.hessian(theta) # if approx: # H = obj.jtj(theta) # else: # H = obj.hessian(theta) activeIndex = list(setIndex - set([i])) tau = np.ones(p) dwdb = -np.linalg.lstsq(H[activeIndex][:, activeIndex],H[i, activeIndex])[0] tau[activeIndex] = dwdb q = qchisq(1 - alpha, df=1) h = np.sqrt(q/(H[i, i] + (H[i, activeIndex].T).dot(dwdb))) # we only move a half step and not a full step as a more # conservative approach is less likely to have shoot out of bounds xhatU = theta + 0.5*h*tau xhatL = theta - 0.5*h*tau if lb is not None: for i, lb_i in enumerate(lb): if xhatL[i] <= lb_i: xhatL[i] = lb_i if ub is not None: for i, ub_i in enumerate(ub): if xhatU[i] >= ub_i: xhatU[i] = ub_i return xhatL, xhatU
def _profileGSecondOrderCorrection(xhat, i, alpha, obj, approx=True): ''' Finds the correction term when approximating the gradient to second order [Venzon1988]_, i.e. :math:`\\delta^{\\top} D(\\theta) \\delta` in [Venzon1988]_. If the system of non-linear equations is a, then we return :math:`a + s` instead of :math:`G^{-1}a`, i.e. we have incorporated the correction into the gradient Parameters ---------- x: array like current value of the parameters xhat: array like parameters at MLE i: int our target variable alpha: numeric confidence level, between :math:`(0,1)` obj: ode object approx: bool, optional default is True. Returns ------- g: array like corrected set of non-linear equations ''' s = sympy.symbols('s') c = obj.cost(xhat) + 0.5 * qchisq(1 - alpha, df=1) D0 = obj.hessian(xhat) def func(x): # first, we obtain all the necessary information. # We use the notation in the original paper. # so that G is the derivative of the systems of # equations and JTJ is D(\theta) if approx: H, output = obj.jtj(x, full_output=True) else: H, output = obj.hessian(x, full_output=True) g = output['grad'] G = H.copy() G[i] = g lvector = g.copy() lvector[i] = obj.cost(x) - c # computing the inverse, even though it is less # accurate then doing a least squares, we are saving # a lot of computation time here invG = np.linalg.inv(G) v = invG.dot(lvector) sTemp = v + sympy.Matrix(invG[:, i]) * s RHS = (sTemp.T * sympy.Matrix(H) * sTemp)[0] sRoots = sympy.solve(sympy.Eq(2 * s, RHS), s) abc = sympy.lambdify((), sympy.Matrix(sRoots), 'np') sRootsReal = np.asarray(abc()).real rootsSize = sRootsReal.size if rootsSize > 0: distL = np.zeros(len(sRootsReal)) for j in range(rootsSize): vTemp = v.copy() + sRootsReal[j] * invG[:, i] distL[j] = vTemp.T.dot(D0.dot(vTemp)) # finish finding the distance index = distL.argmin() lvector[i] += sRootsReal[index] return lvector else: return lvector return func
def _profileGSecondOrderCorrection(xhat, i, alpha, obj, approx=True): ''' Finds the correction term when approximating the gradient to second order, i.e. :math:`\\delta^{\top} D(\\theta) \\delta` in [1]. If the system of non-linear equations is a, then we return :math:`a + s` instead of G^{-1}a, i.e. we have incorporated the correction into the gradient Parameters ---------- x: array like current value of the parameters xhat: array like parameters at MLE i: int our target variable alpha: numeric confidence level, between (0,1) obj: ode object approx: bool, optional default is True. Returns ------- g: array like corrected set of non-linear equations References ---------- .. [1] Venzon and Moolgavkar, A Method for Computing Profile-Likelihood-Based Confidence Intervals, Journal of the Royal Statistical Society, Series C, Vol 37, No. 1, 1988, 87-94. ''' s = sympy.symbols('s') c = obj.cost(xhat) + 0.5 * qchisq(1-alpha, df=1) D0 = obj.hessian(xhat) def func(x): # first, we obtain all the necessary information. # We use the notation in the original paper. # so that G is the derivative of the systems of # equations and JTJ is D(\theta) if approx: H,output = obj.jtj(x,full_output=True) else: H, output = obj.hessian(x, full_output=True) g = output['grad'] G = H.copy() G[i] = g lvector = g.copy() lvector[i] = obj.cost(x) - c # computing the inverse, even though it is less # accurate then doing a least squares, we are saving # a lot of computation time here invG = numpy.linalg.inv(G) v = invG.dot(lvector) sTemp = v + sympy.Matrix(invG[:,i])*s RHS = (sTemp.T * sympy.Matrix(H) * sTemp)[0] sRoots = sympy.solve(sympy.Eq(2*s, RHS), s) abc = sympy.lambdify((),sympy.Matrix(sRoots), 'numpy') sRootsReal = numpy.asarray(abc()).real rootsSize = sRootsReal.size if rootsSize > 0: distL = numpy.zeros(len(sRootsReal)) for j in range(rootsSize): vTemp = v.copy() + sRootsReal[j] * invG[:,i] distL[j] = vTemp.T.dot(D0.dot(vTemp)) # finish finding the distance index = distL.argmin() lvector[i] += sRootsReal[index] return lvector else: return lvector return func