def _findStepSize(x, deltaX, z, deltaZ, G, h, y, deltaY, A, b, func, grad, t, g): if G is None: maxStep = 1 barrierFunc = _logBarrier(func, t, G, h) lineFunc = lineSearch(x, deltaX, barrierFunc) searchScale = deltaX.ravel().dot(g.ravel()) else: maxStep = _maxStepSizePDC(z, deltaZ, x, deltaX, G, h) lineFunc = _residualLineSearchPDC(x, deltaX, grad, t, z, deltaZ, G, h, y, deltaY, A, b) searchScale = -lineFunc(0.0) # perform a line search. Because the minimization routine # in scipy can sometimes be a bit weird, we assume that the # exact line search can sometimes fail, so we do a # back tracking line search if that is the case step, fx = backTrackingLineSearch(maxStep, lineFunc, searchScale, alpha=0.0001, beta=0.8) return step, fx
def _solveKKTAndUpdatePD(x, func, grad, fx, g, gOrig, Haug, z, G, h, y, A, b, t): p = len(x) step = 1.0 deltaX = None deltaZ = None deltaY = None # standard log barrier, \nabla f(x) / -f(x) if G is not None: s = h - G.dot(x) Gs = G/s zs = z/s # now find the matrix/vector of our qp Haug += numpy.einsum('ji,ik->jk', G.T, G*zs) Dphi = Gs.sum(axis=0).reshape(p, 1) g += Dphi / t # find the solution to a Newton step to get the descent direction if A is not None: bTemp = _rPriFunc(x, A, b) g += A.T.dot(y) # print "here" LHS = scipy.sparse.bmat([ [Haug, A.T], [A, None] ],'csc') RHS = numpy.append(g, bTemp, axis=0) # print LHS # print RHS # if the total number of elements (in sparse format) is # more than half total possible elements, it is a dense matrix if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) else: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) # print deltaTemp deltaX = deltaTemp[:p] deltaY = deltaTemp[p::] else: deltaX = scipy.linalg.solve(Haug, -g).reshape(p, 1) # store the information for the next iteration oldFx = fx oldGrad = gOrig.copy() if G is None: maxStep = 1 barrierFunc = _logBarrier(x, func, t, G, h) lineFunc = lineSearch(maxStep, x, deltaX, barrierFunc) searchScale = deltaX.ravel().dot(g.ravel()) else: maxStep = _maxStepSizePD(z, x, deltaX, t, G, h) lineFunc = _residualLineSearchPD(maxStep, x, deltaX, grad, t, z, _deltaZFunc, G, h, y, deltaY, A, b) searchScale = -lineFunc(0.0) # perform a line search. Because the minimization routine # in scipy can sometimes be a bit weird, we assume that the # exact line search can sometimes fail, so we do a # back tracking line search if that is the case step, fx = exactLineSearch2(maxStep, lineFunc, searchScale, oldFx) # step, fx = exactLineSearch(maxStep, lineFunc) # if fx >= oldFx or step <=0: # step, fx = backTrackingLineSearch(maxStep, lineFunc, searchScale) # found one iteration, now update the information if z is not None: z += step * _deltaZFunc(x, deltaX, t, z, G, h) if y is not None: y += step * deltaY x += step * deltaX return x, y, z, fx, step, oldFx, oldGrad, deltaX
def _updateFeasibleNewton(x, gOrg, H, t, z, G, h, y, A, b): # standard log barrier if G is not None: s = h - G.dot(x) Gs = G / s s2 = s**2 Dphi = Gs.sum(axis=0).reshape(p, 1) if j == 0: t = _findInitialBarrier(gOrig, Dphi, A) # print "initial barrier = " +str(t) # print "fake barrier = "+str(_findInitialBarrier(gOrig,Dphi,A)) Haug = H / t + numpy.einsum('ji,ik->jk', G.T, G / s2) g = gOrig + Dphi else: Haug = H / t g = gOrig # solving the least squares problem to get the descent direction if A is not None: # re-adjust the bounds bTemp = b - A.dot(x) LHS = scipy.sparse.bmat([[Haug, A.T], [A, None]], 'csc') RHS = numpy.append(g, -bTemp, axis=0) if LHS.size >= (LHS.shape[0] * LHS.shape[1]) / 2: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) else: deltaTemp = scipy.sparse.linalg.spsolve(LHS, -RHS).reshape(len(RHS), 1) deltaX = deltaTemp[:p] y = deltaTemp[p::] else: deltaX = scipy.linalg.solve(Haug, -g) oldOldFxTemp = oldFx oldFx = fx oldGrad = gOrig lineFunc = lineSearch(step0, x, deltaX, barrierFunc) # step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # barrierGrad = _logBarrierGrad(x, func, gOrig, t, G, h) # step, fc, gc, fx, oldFx, new_slope = scipy.optimize.line_search(barrierFunc, # barrierGrad, # x.ravel(), # deltaX.ravel(), # g.ravel(), # oldFx, # oldOldFx # ) step, fx = backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # if step is not None: # print "step = "+str(step)+ " with fx" +str(fx)+ " and barrier = " +str(barrierFunc(x + step * deltaX)) # print "s" # print h - G.dot(x + step * deltaX) # if step is None: # step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # print "fail wolfe = " +str(step)+ " maxStep = " +str(step0) oldOldFx = oldOldFxTemp x += step * deltaX return x, z, y, fx, oldFx, oldOldFx
def _solveKKTAndUpdatePDCOrig(x, func, grad, fx, oldFx, oldOldFx, g, gOrig, Haug, z, G, h, y, A, b, t): # this is the original version p = len(x) step = 1 deltaX = None deltaZ = None deltaY = None rDual = _rDualFunc(x, grad, z, G, y, A) RHS = rDual # RHS = g if G is not None: s = h - G.dot(x) Gs = G / s zs = z / s # now find the matrix/vector of our qp rCent = _rCentFunc(z, s, t) RHS = numpy.append(RHS, rCent, axis=0) ## solving the QP to get the descent direction if A is not None: bTemp = b - A.dot(x) rPri = _rPriFunc(x, A, b) RHS = numpy.append(RHS, rPri, axis=0) if G is not None: LHS = scipy.sparse.bmat( [[Haug, G.T, A.T], [G * -z, scipy.sparse.diags(s.ravel(), 0), None], [A, None, None]], 'csc') if LHS.size >= (LHS.shape[0] * LHS.shape[1]) / 2: deltaTemp = scipy.sparse.linalg.spsolve(LHS, -RHS).reshape( len(RHS), 1) else: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) # print deltaTemp deltaX = deltaTemp[:p] deltaZ = deltaTemp[p:-len(A)] deltaY = deltaTemp[-len(A):] else: # G is None LHS = scipy.sparse.bmat([[Haug, A.T], [A, None]], 'csc') if LHS.size >= (LHS.shape[0] * LHS.shape[1]) / 2: deltaTemp = scipy.sparse.linalg.spsolve(LHS, -RHS).reshape( len(RHS), 1) else: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) # print deltaTemp deltaX = deltaTemp[:p] deltaY = deltaTemp[p::] else: # A is None if G is not None: LHS = scipy.sparse.bmat([ [Haug, G.T], [G * -z, scipy.sparse.diags(s.ravel(), 0)], ], 'csc') if LHS.size >= (LHS.shape[0] * LHS.shape[1]) / 2: deltaTemp = scipy.sparse.linalg.spsolve(LHS, -RHS).reshape( len(RHS), 1) else: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) # print deltaTemp deltaX = deltaTemp[:p] deltaZ = deltaTemp[p::] else: deltaX = scipy.linalg.solve(Haug, -RHS).reshape(len(RHS), 1) # store the information for the next iteration oldFx = fx oldGrad = gOrig.copy() if G is None: # print "obj" maxStep = 1 barrierFunc = _logBarrier(x, func, t, G, h) lineFunc = lineSearch(x, deltaX, barrierFunc) searchScale = deltaX.ravel().dot(g.ravel()) else: maxStep = _maxStepSizePDC(z, deltaZ, x, deltaX, G, h) lineFunc = _residualLineSearchPDC(x, deltaX, grad, t, z, deltaZ, G, h, y, deltaY, A, b) searchScale = -lineFunc(0.0) # perform a line search. Because the minimization routine # in scipy can sometimes be a bit weird, we assume that the # exact line search can sometimes fail, so we do a # back tracking line search if that is the case # step, fx = exactLineSearch(maxStep, lineFunc) # if fx >= oldFx or step <= 0 or step>=maxStep: # step, fx = backTrackingLineSearch(maxStep, lineFunc, searchScale, oldFx) step, fx = backTrackingLineSearch(maxStep, lineFunc, searchScale, alpha=0.0001, beta=0.8) # step, fx = exactLineSearch2(maxStep, lineFunc, searchScale, oldFx) if z is not None: z += step * deltaZ if y is not None: y += step * deltaY x += step * deltaX return x, y, z, fx, step, oldFx, oldGrad, deltaX
def _updateLineSearch(x, fx, oldFx, oldOldFx, oldDeltaX, g, H, func, grad, z, G, h, y, A, b): initVals = dict() initVals['x'] = matrix(oldDeltaX) # readjust the bounds and initial value if possible # as we try our best to use warm start if G is not None: hTemp = h - G.dot(x) dims = {'l': G.shape[0], 'q': [], 's': []} initVals['z'] = matrix(z) s = hTemp - G.dot(oldDeltaX) while numpy.any(s<=0.0): oldDeltaX *= 0.5 s = h - G.dot(oldDeltaX) initVals['s'] = matrix(s) initVals['x'] = matrix(oldDeltaX) #print initVals['s'] else: hTemp = None dims = [] if A is not None: initVals['y'] = matrix(y) bTemp = b - A.dot(x) else: bTemp = None # solving the QP to get the descent direction if A is not None: if G is not None: qpOut = solvers.coneqp(matrix(H), matrix(g), matrix(G), matrix(hTemp), dims, matrix(A), matrix(bTemp)) else: qpOut = solvers.coneqp(matrix(H), matrix(g), None, None, None, matrix(A), matrix(bTemp)) else: if G is not None: qpOut = solvers.coneqp(matrix(H), matrix(g), matrix(G), matrix(hTemp), dims, initvals=initVals) else: qpOut = solvers.coneqp(matrix(H), matrix(g)) # exact the descent diretion and do a line search deltaX = numpy.array(qpOut['x']) oldOldFx = oldFx oldFx = fx oldGrad = g.copy() lineFunc = lineSearch(x, deltaX, func) #step, fx = exactLineSearch(1, x, deltaX, func) # step, fc, gc, fx, oldFx, new_slope = line_search(func, # grad, # x.ravel(), # deltaX.ravel(), # g.ravel(), # oldFx, # oldOldFx) # print step # if step is None: # step, fx = exactLineSearch2(1, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # step, fx = exactLineSearch(1, lineFunc) # if fx >= oldFx: step, fx = backTrackingLineSearch(1, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001, beta=0.8) x += step * deltaX if G is not None: z[:] = numpy.array(qpOut['z']) if A is not None: y[:] = numpy.array(qpOut['y']) return x, deltaX, z, y, fx, oldFx, oldOldFx, oldGrad, step, qpOut['iterations']
def ipBar(func, grad, hessian=None, x0=None, lb=None, ub=None, G=None, h=None, A=None, b=None, maxiter=100, disp=0, full_output=False): z, G, h, y, A, b = _setup(lb, ub, G, h, A, b) x = _checkInitialValue(x0, G, h, A, b) p = len(x) if hessian is None: approxH = BFGS elif type(hessian) is str: if hessian.lower() == 'bfgs': approxH = BFGS elif hessian.lower() == 'sr1': approxH = SR1 elif hessian.lower() == 'dfp': approxH = DFP else: raise Exception("Input name of hessian is not recognizable") hessian = None if grad is None: def finiteForward(x, func, p): def finiteForward1(x): return forward(func, x.ravel()) return finiteForward1 grad = finiteForward(x, func, p) if G is not None: m = G.shape[0] else: m = 1 fx = None oldFx = None oldOldFx = None oldGrad = None deltaX = numpy.zeros((p, 1)) g = numpy.zeros((p, 1)) H = numpy.zeros((p, p)) Haug = numpy.zeros((p, p)) dispObj = Disp(disp) i = 0 t = 0.01 mu = 20.0 step0 = 1.0 # back tracking search step maximum value step = 0.0 j = 0 while maxiter >= j: oldFx = numpy.inf # define the barrier function given t. Note that # t is adjusted at each outer iteration barrierFunc = _logBarrier(func, t, G, h) if j == 0: fx = barrierFunc(x) #print "barrier = " +str(fx) update = True #while (abs(fx-oldFx)/fx)>=rtol and abs(fx-oldFx)>=atol: # for i in range(1): while update: # print abs(fx-oldFx) # print abs(fx-oldFx)/fx # print fx # print oldFx gOrig = grad(x.ravel()).reshape(p, 1) if hessian is None: if oldGrad is None: H = numpy.eye(len(x)) else: diffG = numpy.array(gOrig - oldGrad).ravel() H = approxH(H, diffG, step * deltaX.ravel()) else: H = hessian(x.ravel()) ## standard log barrier if G is not None: s = h - G.dot(x) Gs = G / s s2 = s**2 Dphi = Gs.sum(axis=0).reshape(p, 1) if j == 0: t = _findInitialBarrier(gOrig, Dphi, A) # print "initial barrier = " +str(t) # print "fake barrier = "+str(_findInitialBarrier(gOrig,Dphi,A)) Haug = t * H + numpy.einsum('ji,ik->jk', G.T, G / s2) g = t * gOrig + Dphi else: Haug = t * H g = t * gOrig ## solving the QP to get the descent direction if A is not None: # re-adjust the bounds bTemp = b - A.dot(x) LHS = scipy.sparse.bmat([[Haug, A.T], [A, None]], 'csc') RHS = numpy.append(g, -bTemp, axis=0) if LHS.size >= (LHS.shape[0] * LHS.shape[1]) / 2: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) else: deltaTemp = scipy.sparse.linalg.spsolve(LHS, -RHS).reshape( len(RHS), 1) deltaX = deltaTemp[:p] y = deltaTemp[p::] else: deltaX = scipy.linalg.solve(Haug, -g) oldOldFxTemp = oldFx oldFx = fx oldGrad = gOrig lineFunc = lineSearch(x, deltaX, barrierFunc) #step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # step, fx = backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001,beta=0.8) barrierGrad = _logBarrierGrad(func, gOrig, t, G, h) step, fc, gc, fx, oldFx, new_slope = scipy.optimize.line_search( barrierFunc, barrierGrad, x.ravel(), deltaX.ravel(), g.ravel(), oldFx, oldOldFx) # print "fx = " +str(fx) # print "step= " +str(step) # if step is not None: # print "step = "+str(step)+ " with fx" +str(fx)+ " and barrier = " +str(barrierFunc(x + step * deltaX)) # print "s" # print h - G.dot(x + step * deltaX) if step is None: # step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) step, fx = backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot( g.ravel()), alpha=0.0001, beta=0.8) # print "fail wolfe = " +str(step)+ " maxStep = " +str(step0) # print "fx = " +str(fx) # print "step= " +str(step) update = False oldOldFx = oldOldFxTemp x += step * deltaX # print "stepped func = "+str(func(x)) j += 1 # dispObj.d(j, x.ravel() , fx, deltaX.ravel(), g.ravel(), step) dispObj.d(j, x.ravel(), func(x.ravel()), deltaX.ravel(), g.ravel(), step) # end of inner iteration i += 1 # obtain the missing Lagrangian multiplier if G is not None: s = h - G.dot(x) z = 1.0 / (t * s) if m / t < atol: if sufficientNewtonDecrement(deltaX.ravel(), g.ravel()): break else: t *= mu # print scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A)) if scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A)) <= EPSILON: break # end of outer iteration # TODO: full_output- dual variables if full_output: output = dict() output['t'] = t output['outerIter'] = i output['innerIter'] = j if G is not None: s = h - G.dot(x) z = 1.0 / (t * s) output['s'] = s.ravel() output['z'] = z.ravel() if A is not None: y = y / t output['y'] = y.ravel() gap = _dualityGap(func, x, z, G, h, y, A, b) output['subopt'] = m / t output['dgap'] = gap output['fx'] = func(x) output['H'] = H output['g'] = gOrig.ravel() output['rDual'] = _rDualFunc(x, grad, z, G, y, A) return x.ravel(), output else: return x.ravel()
def ipBar(func, grad, hessian=None, x0=None, lb=None, ub=None, G=None, h=None, A=None, b=None, maxiter=100, disp=0, full_output=False): z, G, h, y, A, b = _setup(lb, ub, G, h, A, b) x = _checkInitialValue(x0, G, h, A, b) p = len(x) if hessian is None: approxH = BFGS elif type(hessian) is str: if hessian.lower()=='bfgs': approxH = BFGS elif hessian.lower()=='sr1': approxH = SR1 elif hessian.lower()=='dfp': approxH = DFP else: raise Exception("Input name of hessian is not recognizable") hessian = None if grad is None: def finiteForward(x,func,p): def finiteForward1(x): return forward(func,x.ravel()) return finiteForward1 grad = finiteForward(x,func,p) if G is not None: m = G.shape[0] else: m = 1 fx = None oldFx = None oldOldFx = None oldGrad = None deltaX = numpy.zeros((p,1)) g = numpy.zeros((p,1)) H = numpy.zeros((p,p)) Haug = numpy.zeros((p,p)) dispObj = Disp(disp) i = 0 t = 0.01 mu = 20.0 step0 = 1.0 # back tracking search step maximum value step = 0.0 j = 0 while maxiter>=j: oldFx = numpy.inf # define the barrier function given t. Note that # t is adjusted at each outer iteration barrierFunc = _logBarrier(func, t, G, h) if j==0: fx = barrierFunc(x) #print "barrier = " +str(fx) update = True #while (abs(fx-oldFx)/fx)>=rtol and abs(fx-oldFx)>=atol: # for i in range(1): while update: # print abs(fx-oldFx) # print abs(fx-oldFx)/fx # print fx # print oldFx gOrig = grad(x.ravel()).reshape(p,1) if hessian is None: if oldGrad is None: H = numpy.eye(len(x)) else: diffG = numpy.array(gOrig - oldGrad).ravel() H = approxH(H, diffG, step * deltaX.ravel()) else: H = hessian(x.ravel()) ## standard log barrier if G is not None: s = h - G.dot(x) Gs = G/s s2 = s**2 Dphi = Gs.sum(axis=0).reshape(p,1) if j==0: t = _findInitialBarrier(gOrig,Dphi,A) # print "initial barrier = " +str(t) # print "fake barrier = "+str(_findInitialBarrier(gOrig,Dphi,A)) Haug = t*H + numpy.einsum('ji,ik->jk',G.T, G/s2) g = t*gOrig + Dphi else: Haug = t*H g = t*gOrig ## solving the QP to get the descent direction if A is not None: # re-adjust the bounds bTemp = b - A.dot(x) LHS = scipy.sparse.bmat([ [Haug,A.T], [A,None] ], 'csc') RHS = numpy.append(g,-bTemp,axis=0) if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2: deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1) else: deltaTemp = scipy.sparse.linalg.spsolve(LHS,-RHS).reshape(len(RHS),1) deltaX = deltaTemp[:p] y = deltaTemp[p::] else: deltaX = scipy.linalg.solve(Haug,-g) oldOldFxTemp = oldFx oldFx = fx oldGrad = gOrig lineFunc = lineSearch(x, deltaX, barrierFunc) #step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # step, fx = backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001,beta=0.8) barrierGrad = _logBarrierGrad(func, gOrig, t, G, h) step, fc, gc, fx, oldFx, new_slope = scipy.optimize.line_search(barrierFunc, barrierGrad, x.ravel(), deltaX.ravel(), g.ravel(), oldFx, oldOldFx ) # print "fx = " +str(fx) # print "step= " +str(step) # if step is not None: # print "step = "+str(step)+ " with fx" +str(fx)+ " and barrier = " +str(barrierFunc(x + step * deltaX)) # print "s" # print h - G.dot(x + step * deltaX) if step is None: # step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) step, fx = backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001,beta=0.8) # print "fail wolfe = " +str(step)+ " maxStep = " +str(step0) # print "fx = " +str(fx) # print "step= " +str(step) update = False oldOldFx = oldOldFxTemp x += step * deltaX # print "stepped func = "+str(func(x)) j += 1 # dispObj.d(j, x.ravel() , fx, deltaX.ravel(), g.ravel(), step) dispObj.d(j, x.ravel() , func(x.ravel()), deltaX.ravel(), g.ravel(), step) # end of inner iteration i += 1 # obtain the missing Lagrangian multiplier if G is not None: s = h - G.dot(x) z = 1.0 / (t * s) if m/t < atol: if sufficientNewtonDecrement(deltaX.ravel(),g.ravel()): break else: t *= mu # print scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A)) if scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A))<=EPSILON: break # end of outer iteration # TODO: full_output- dual variables if full_output: output = dict() output['t'] = t output['outerIter'] = i output['innerIter'] = j if G is not None: s = h - G.dot(x) z = 1.0 / (t * s) output['s'] = s.ravel() output['z'] = z.ravel() if A is not None: y = y/t output['y'] = y.ravel() gap = _dualityGap(func, x, z, G, h, y, A, b) output['subopt'] = m/t output['dgap'] = gap output['fx'] = func(x) output['H'] = H output['g'] = gOrig.ravel() output['rDual'] = _rDualFunc(x, grad, z, G, y, A) return x.ravel(), output else: return x.ravel()
def _updateFeasibleNewton(x, gOrg, H, t, z, G, h, y, A, b): # standard log barrier if G is not None: s = h - G.dot(x) Gs = G/s s2 = s**2 Dphi = Gs.sum(axis=0).reshape(p,1) if j==0: t = _findInitialBarrier(gOrig,Dphi,A) # print "initial barrier = " +str(t) # print "fake barrier = "+str(_findInitialBarrier(gOrig,Dphi,A)) Haug = H/t + numpy.einsum('ji,ik->jk',G.T, G/s2) g = gOrig + Dphi else: Haug = H/t g = gOrig # solving the least squares problem to get the descent direction if A is not None: # re-adjust the bounds bTemp = b - A.dot(x) LHS = scipy.sparse.bmat([ [Haug,A.T], [A,None] ], 'csc') RHS = numpy.append(g,-bTemp,axis=0) if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2: deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1) else: deltaTemp = scipy.sparse.linalg.spsolve(LHS,-RHS).reshape(len(RHS),1) deltaX = deltaTemp[:p] y = deltaTemp[p::] else: deltaX = scipy.linalg.solve(Haug,-g) oldOldFxTemp = oldFx oldFx = fx oldGrad = gOrig lineFunc = lineSearch(step0, x, deltaX, barrierFunc) # step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # barrierGrad = _logBarrierGrad(x, func, gOrig, t, G, h) # step, fc, gc, fx, oldFx, new_slope = scipy.optimize.line_search(barrierFunc, # barrierGrad, # x.ravel(), # deltaX.ravel(), # g.ravel(), # oldFx, # oldOldFx # ) step, fx = backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # if step is not None: # print "step = "+str(step)+ " with fx" +str(fx)+ " and barrier = " +str(barrierFunc(x + step * deltaX)) # print "s" # print h - G.dot(x + step * deltaX) # if step is None: # step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # print "fail wolfe = " +str(step)+ " maxStep = " +str(step0) oldOldFx = oldOldFxTemp x += step * deltaX return x, z, y, fx, oldFx, oldOldFx
def _updateLineSearch(x, fx, oldFx, oldOldFx, oldDeltaX, g, H, func, grad, z, G, h, y, A, b): initVals = dict() initVals['x'] = matrix(oldDeltaX) # readjust the bounds and initial value if possible # as we try our best to use warm start if G is not None: hTemp = h - G.dot(x) dims = {'l': G.shape[0], 'q': [], 's': []} initVals['z'] = matrix(z) s = hTemp - G.dot(oldDeltaX) while numpy.any(s <= 0.0): oldDeltaX *= 0.5 s = h - G.dot(oldDeltaX) initVals['s'] = matrix(s) initVals['x'] = matrix(oldDeltaX) #print initVals['s'] else: hTemp = None dims = [] if A is not None: initVals['y'] = matrix(y) bTemp = b - A.dot(x) else: bTemp = None # solving the QP to get the descent direction if A is not None: if G is not None: qpOut = solvers.coneqp(matrix(H), matrix(g), matrix(G), matrix(hTemp), dims, matrix(A), matrix(bTemp)) else: qpOut = solvers.coneqp(matrix(H), matrix(g), None, None, None, matrix(A), matrix(bTemp)) else: if G is not None: qpOut = solvers.coneqp(matrix(H), matrix(g), matrix(G), matrix(hTemp), dims, initvals=initVals) else: qpOut = solvers.coneqp(matrix(H), matrix(g)) # exact the descent diretion and do a line search deltaX = numpy.array(qpOut['x']) oldOldFx = oldFx oldFx = fx oldGrad = g.copy() lineFunc = lineSearch(x, deltaX, func) #step, fx = exactLineSearch(1, x, deltaX, func) # step, fc, gc, fx, oldFx, new_slope = line_search(func, # grad, # x.ravel(), # deltaX.ravel(), # g.ravel(), # oldFx, # oldOldFx) # print step # if step is None: # step, fx = exactLineSearch2(1, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # step, fx = exactLineSearch(1, lineFunc) # if fx >= oldFx: step, fx = backTrackingLineSearch(1, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001, beta=0.8) x += step * deltaX if G is not None: z[:] = numpy.array(qpOut['z']) if A is not None: y[:] = numpy.array(qpOut['y']) return x, deltaX, z, y, fx, oldFx, oldOldFx, oldGrad, step, qpOut[ 'iterations']
def _solveKKTAndUpdatePDC(x, func, grad, fx, g, gOrig, Haug, z, G, h, y, A, b, t): p = len(x) step = 1 deltaX = None deltaZ = None deltaY = None rDual = _rDualFunc(x, grad, z, G, y, A) RHS = rDual if G is not None: s = h - G.dot(x) Gs = G/s zs = z/s # now find the matrix/vector of our qp rCent = _rCentFunc(z, s, t) RHS = numpy.append(RHS, rCent, axis=0) ## solving the QP to get the descent direction if A is not None: bTemp = b - A.dot(x) g += A.T.dot(y) rPri = _rPriFunc(x, A, b) RHS = numpy.append(RHS, rPri, axis=0) if G is not None: LHS = scipy.sparse.bmat([ [Haug, G.T, A.T], [G*-z, scipy.sparse.diags(s.ravel(),0), None], [A, None, None] ],'csc') if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2: deltaTemp = scipy.sparse.linalg.spsolve(LHS, -RHS).reshape(len(RHS), 1) else: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) deltaX = deltaTemp[:p] deltaZ = deltaTemp[p:-len(A)] deltaY = deltaTemp[-len(A):] else: # G is None LHS = scipy.sparse.bmat([ [Haug, A.T], [A, None] ],'csc') if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2: deltaTemp = scipy.sparse.linalg.spsolve(LHS, -RHS).reshape(len(RHS), 1) else: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS),1) deltaX = deltaTemp[:p] deltaY = deltaTemp[p::] else: # A is None if G is not None: LHS = scipy.sparse.bmat([ [Haug, G.T], [G*-z, scipy.sparse.diags(s.ravel(),0)], ],'csc') deltaTemp = scipy.sparse.linalg.spsolve(LHS, -RHS).reshape(len(RHS), 1) deltaX = deltaTemp[:p] deltaZ = deltaTemp[p::] else: deltaX = scipy.linalg.solve(Haug, -RHS).reshape(len(RHS), 1) # store the information for the next iteration oldFx = fx oldGrad = gOrig.copy() if G is None: # print "obj" maxStep = 1 barrierFunc = _logBarrier(x, func, t, G, h) lineFunc = lineSearch(maxStep, x, deltaX, barrierFunc) searchScale = deltaX.ravel().dot(g.ravel()) else: maxStep = _maxStepSizePDC(z, deltaZ, x, deltaX, G, h) lineFunc = _residualLineSearchPDC(step, x, deltaX, grad, t, z, deltaZ, G, h, y, deltaY, A, b) searchScale = -lineFunc(0.0) # perform a line search. Because the minimization routine # in scipy can sometimes be a bit weird, we assume that the # exact line search can sometimes fail, so we do a # back tracking line search if that is the case step, fx = exactLineSearch2(maxStep, lineFunc, searchScale, oldFx) # step, fx = exactLineSearch(maxStep, lineFunc) # if fx >= oldFx or step <= 0 or step>=maxStep: # step, fx = backTrackingLineSearch(maxStep, lineFunc, searchScale, oldFx) if z is not None: z += step * deltaZ if y is not None: y += step * deltaY x += step * deltaX return x, y, z, fx, step, oldFx, oldGrad, deltaX
def _solveKKTAndUpdatePD(x, func, grad, fx, g, gOrig, Haug, z, G, h, y, A, b, t): p = len(x) step = 1.0 deltaX = None deltaZ = None deltaY = None # standard log barrier, \nabla f(x) / -f(x) if G is not None: s = h - G.dot(x) Gs = G / s zs = z / s # now find the matrix/vector of our qp Haug += numpy.einsum('ji,ik->jk', G.T, G * zs) Dphi = Gs.sum(axis=0).reshape(p, 1) g += Dphi / t # find the solution to a Newton step to get the descent direction if A is not None: bTemp = _rPriFunc(x, A, b) g += A.T.dot(y) # print "here" LHS = scipy.sparse.bmat([[Haug, A.T], [A, None]], 'csc') RHS = numpy.append(g, bTemp, axis=0) # print LHS # print RHS # if the total number of elements (in sparse format) is # more than half total possible elements, it is a dense matrix if LHS.size >= (LHS.shape[0] * LHS.shape[1]) / 2: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) else: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) # print deltaTemp deltaX = deltaTemp[:p] deltaY = deltaTemp[p::] else: deltaX = scipy.linalg.solve(Haug, -g).reshape(p, 1) # store the information for the next iteration oldFx = fx oldGrad = gOrig.copy() if G is None: maxStep = 1 barrierFunc = _logBarrier(x, func, t, G, h) lineFunc = lineSearch(maxStep, x, deltaX, barrierFunc) searchScale = deltaX.ravel().dot(g.ravel()) else: maxStep = _maxStepSizePD(z, x, deltaX, t, G, h) lineFunc = _residualLineSearchPD(maxStep, x, deltaX, grad, t, z, _deltaZFunc, G, h, y, deltaY, A, b) searchScale = -lineFunc(0.0) # perform a line search. Because the minimization routine # in scipy can sometimes be a bit weird, we assume that the # exact line search can sometimes fail, so we do a # back tracking line search if that is the case step, fx = exactLineSearch2(maxStep, lineFunc, searchScale, oldFx) # step, fx = exactLineSearch(maxStep, lineFunc) # if fx >= oldFx or step <=0: # step, fx = backTrackingLineSearch(maxStep, lineFunc, searchScale) # found one iteration, now update the information if z is not None: z += step * _deltaZFunc(x, deltaX, t, z, G, h) if y is not None: y += step * deltaY x += step * deltaX return x, y, z, fx, step, oldFx, oldGrad, deltaX