Example #1
0
 def para_solver(L):
     # Levenberg-Marquardt
     # HT = H + (L-1)**2*np.diag(np.diag(H))
     # Attempt to use plain Levenberg
     HT = H + (L-1)**2*np.eye(len(H))
     # print "Inverting Scaled Hessian:"                       ###
     # print " G:"                                             ###
     # pvec1d(G,precision=5)                                   ###
     # print " HT: (Scal = %.4f)" % (1+(L-1)**2)               ###
     # pmat2d(HT,precision=5)                                  ###
     Hi = invert_svd(np.mat(HT))
     dx = flat(-1 * Hi * col(G))
     # print " dx:"                                            ###
     # pvec1d(dx,precision=5)                                  ###
     # dxa = -solve(HT, G)
     # dxa = flat(dxa)
     # print " dxa:"                                           ###
     # pvec1d(dxa,precision=5)                                 ###
     # print                                                   ###
     sol = flat(0.5*row(dx)*np.mat(H)*col(dx))[0] + np.dot(dx,G)
     for i in self.excision:    # Reinsert deleted coordinates - don't take a step in those directions
         dx = np.insert(dx, i, 0)
     return dx, sol
Example #2
0
    def step(self, xk, data, trust):
        """ Computes the next step in the parameter space.  There are lots of tricks here that I will document later.

        @param[in] G The gradient
        @param[in] H The Hessian
        @param[in] trust The trust radius
        
        """
        from scipy import optimize

        X, G, H = (data['X0'], data['G0'], data['H0']) if self.bhyp else (data['X'], data['G'], data['H'])
        H1 = H.copy()
        H1 = np.delete(H1, self.excision, axis=0)
        H1 = np.delete(H1, self.excision, axis=1)
        Eig = eig(H1)[0]            # Diagonalize Hessian
        Emin = min(Eig)
        if Emin < self.eps:         # Mix in SD step if Hessian minimum eigenvalue is negative
            # Experiment.
            Adj = max(self.eps, 0.01*abs(Emin)) - Emin
            print "Hessian has a small or negative eigenvalue (%.1e), mixing in some steepest descent (%.1e) to correct this." % (Emin, Adj)
            print "Eigenvalues are:"   ###
            pvec1d(Eig)                ###
            H += Adj*np.eye(H.shape[0])

        if self.bhyp:
            G = np.delete(G, self.excision)
            H = np.delete(H, self.excision, axis=0)
            H = np.delete(H, self.excision, axis=1)
            xkd = np.delete(xk, self.excision)
            if self.Objective.Penalty.fmul != 0.0:
                warn_press_key("Using the multiplicative hyperbolic penalty is discouraged!")
            # This is the gradient and Hessian without the contributions from the hyperbolic constraint.
            Obj0 = {'X':X,'G':G,'H':H}
            class Hyper(object):
                def __init__(self, HL, Penalty):
                    self.H = HL.copy()
                    self.dx = 1e10 * np.ones(len(HL),dtype=float)
                    self.Val = 0
                    self.Grad = np.zeros(len(HL),dtype=float)
                    self.Hess = np.zeros((len(HL),len(HL)),dtype=float)
                    self.Penalty = Penalty
                def _compute(self, dx):
                    self.dx = dx.copy()
                    Tmp = np.mat(self.H)*col(dx)
                    Reg_Term   = self.Penalty.compute(xkd+flat(dx), Obj0)
                    self.Val   = (X + np.dot(dx, G) + 0.5*row(dx)*Tmp + Reg_Term[0] - data['X'])[0,0]
                    self.Grad  = flat(col(G) + Tmp) + Reg_Term[1]
                def compute_val(self, dx):
                    if norm(dx - self.dx) > 1e-8:
                        self._compute(dx)
                    return self.Val
                def compute_grad(self, dx):
                    if norm(dx - self.dx) > 1e-8:
                        self._compute(dx)
                    return self.Grad
                def compute_hess(self, dx):
                    if norm(dx - self.dx) > 1e-8:
                        self._compute(dx)
                    return self.Hess
            def hyper_solver(L):
                dx0 = np.zeros(len(xkd),dtype=float)
                #dx0 = np.delete(dx0, self.excision)
                # HL = H + (L-1)**2*np.diag(np.diag(H))
                # Attempt to use plain Levenberg
                HL = H + (L-1)**2*np.eye(len(H))

                HYP = Hyper(HL, self.Objective.Penalty)
                try:
                    Opt1 = optimize.fmin_bfgs(HYP.compute_val,dx0,fprime=HYP.compute_grad,gtol=1e-5,full_output=True,disp=0)
                except:
                    Opt1 = optimize.fmin(HYP.compute_val,dx0,full_output=True,disp=0)
                try:
                    Opt2 = optimize.fmin_bfgs(HYP.compute_val,-xkd,fprime=HYP.compute_grad,gtol=1e-5,full_output=True,disp=0)
                except:
                    Opt2 = optimize.fmin(HYP.compute_val,-xkd,full_output=True,disp=0)
                #Opt2 = optimize.fmin(HYP.compute_val,-xkd,full_output=True,disp=0)
                dx1, sol1 = Opt1[0], Opt1[1]
                dx2, sol2 = Opt2[0], Opt2[1]
                dxb, sol = (dx1, sol1) if sol1 <= sol2 else (dx2, sol2)
                for i in self.excision:    # Reinsert deleted coordinates - don't take a step in those directions
                    dxb = np.insert(dxb, i, 0)
                return dxb, sol
        else:
            # G0 and H0 are used for determining the expected function change.
            G0 = G.copy()
            H0 = H.copy()
            G = np.delete(G, self.excision)
            H = np.delete(H, self.excision, axis=0)
            H = np.delete(H, self.excision, axis=1)
            # print "Inverting Hessian:"                 ###
            # print " G:"                                ###
            # pvec1d(G,precision=5)                      ###
            # print " H:"                                ###
            # pmat2d(H,precision=5)                      ###
            Hi = invert_svd(np.mat(H))
            dx = flat(-1 * Hi * col(G))
            # print " dx:"                               ###
            # pvec1d(dx,precision=5)                     ###
            # dxa = -solve(H, G)          # Take Newton Raphson Step ; use -1*G if want steepest descent.
            # dxa = flat(dxa)
            # print " dxa:"                              ###
            # pvec1d(dxa,precision=5)                    ###
            print                                      ###
            for i in self.excision:    # Reinsert deleted coordinates - don't take a step in those directions
                dx = np.insert(dx, i, 0)
            def para_solver(L):
                # Levenberg-Marquardt
                # HT = H + (L-1)**2*np.diag(np.diag(H))
                # Attempt to use plain Levenberg
                HT = H + (L-1)**2*np.eye(len(H))
                # print "Inverting Scaled Hessian:"                       ###
                # print " G:"                                             ###
                # pvec1d(G,precision=5)                                   ###
                # print " HT: (Scal = %.4f)" % (1+(L-1)**2)               ###
                # pmat2d(HT,precision=5)                                  ###
                Hi = invert_svd(np.mat(HT))
                dx = flat(-1 * Hi * col(G))
                # print " dx:"                                            ###
                # pvec1d(dx,precision=5)                                  ###
                # dxa = -solve(HT, G)
                # dxa = flat(dxa)
                # print " dxa:"                                           ###
                # pvec1d(dxa,precision=5)                                 ###
                # print                                                   ###
                sol = flat(0.5*row(dx)*np.mat(H)*col(dx))[0] + np.dot(dx,G)
                for i in self.excision:    # Reinsert deleted coordinates - don't take a step in those directions
                    dx = np.insert(dx, i, 0)
                return dx, sol
    
        def solver(L):
            return hyper_solver(L) if self.bhyp else para_solver(L)
    
        def trust_fun(L):
            N = norm(solver(L)[0])
            #print "\rL = %.4e, Hessian diagonal addition = %.4e: found length %.4e, objective is %.4e" % (L, (L-1)**2, N, (N - trust)**2)
            return (N - trust)**2

        def search_fun(L):
            # Evaluate ONLY the objective function.  Most useful when
            # the objective is cheap, but the derivative is expensive.
            dx, sol = solver(L) # dx is how much the step changes from the previous step.
            # This is our trial step.
            xk_ = dx + xk
            Result = self.Objective.Full(xk_,0,verbose=False)['X'] - data['X']
            print "Searching! Hessian diagonal addition = %.4e, L = % .4e, length %.4e, result %.4e" % ((L-1)**2,L,norm(dx),Result)
            return Result
        
        if self.trust0 > 0: # This is the trust region code.
            bump = False
            dx, expect = solver(1)
            dxnorm = norm(dx)
            if dxnorm > trust:
                bump = True
                # Tried a few optimizers here, seems like Brent works well.
                # Okay, the problem with Brent is that the tolerance is fractional.  
                # If the optimized value is zero, then it takes a lot of meaningless steps.
                LOpt = optimize.brent(trust_fun,brack=(self.lmg,self.lmg*4),tol=1e-6)
                ### Result = optimize.fmin_powell(trust_fun,3,xtol=self.search_tol,ftol=self.search_tol,full_output=1,disp=0)
                ### LOpt = Result[0]
                dx, expect = solver(LOpt)
                dxnorm = norm(dx)
                # print "\rLevenberg-Marquardt: %s step found (length %.3e), Hessian diagonal is scaled by % .8f" % ('hyperbolic-regularized' if self.bhyp else 'Newton-Raphson', dxnorm, (LOpt-1)**2)
                print "\rLevenberg-Marquardt: %s step found (length %.3e), % .8f added to Hessian diagonal" % ('hyperbolic-regularized' if self.bhyp else 'Newton-Raphson', dxnorm, (LOpt-1)**2)
        else: # This is the nonlinear search code.
            # First obtain a step that is the same length as the provided trust radius.
            LOpt = optimize.brent(trust_fun,brack=(self.lmg,self.lmg*4),tol=1e-6)
            bump = False
            Result = optimize.brent(search_fun,brack=(LOpt,LOpt*4),tol=self.search_tol,full_output=1)
            ### optimize.fmin(search_fun,0,xtol=1e-8,ftol=data['X']*0.1,full_output=1,disp=0)
            ### Result = optimize.fmin_powell(search_fun,3,xtol=self.search_tol,ftol=self.search_tol,full_output=1,disp=0)
            dx, _ = solver(Result[0])
            expect = Result[1]

        ## Decide which parameters to redirect.
        ## Currently not used.
        if self.Objective.Penalty.ptyp in [3,4,5]:
            self.FF.make_redirect(dx+xk)

        return dx, expect, bump