Esempio n. 1
0
    def step(self, xk, data, trust):
        """ Computes the next step in the parameter space.  There are lots of tricks here that I will document later.

        @param[in] G The gradient
        @param[in] H The Hessian
        @param[in] trust The trust radius
        
        """
        from scipy import optimize

        X, G, H = (data['X0'], data['G0'], data['H0']) if self.bhyp else (data['X'], data['G'], data['H'])
        H1 = H.copy()
        H1 = np.delete(H1, self.excision, axis=0)
        H1 = np.delete(H1, self.excision, axis=1)
        Eig = eig(H1)[0]            # Diagonalize Hessian
        Emin = min(Eig)
        if Emin < self.eps:         # Mix in SD step if Hessian minimum eigenvalue is negative
            # Experiment.
            Adj = max(self.eps, 0.01*abs(Emin)) - Emin
            print "Hessian has a small or negative eigenvalue (%.1e), mixing in some steepest descent (%.1e) to correct this." % (Emin, Adj)
            print "Eigenvalues are:"   ###
            pvec1d(Eig)                ###
            H += Adj*np.eye(H.shape[0])

        if self.bhyp:
            G = np.delete(G, self.excision)
            H = np.delete(H, self.excision, axis=0)
            H = np.delete(H, self.excision, axis=1)
            xkd = np.delete(xk, self.excision)
            if self.Objective.Penalty.fmul != 0.0:
                warn_press_key("Using the multiplicative hyperbolic penalty is discouraged!")
            # This is the gradient and Hessian without the contributions from the hyperbolic constraint.
            Obj0 = {'X':X,'G':G,'H':H}
            class Hyper(object):
                def __init__(self, HL, Penalty):
                    self.H = HL.copy()
                    self.dx = 1e10 * np.ones(len(HL),dtype=float)
                    self.Val = 0
                    self.Grad = np.zeros(len(HL),dtype=float)
                    self.Hess = np.zeros((len(HL),len(HL)),dtype=float)
                    self.Penalty = Penalty
                def _compute(self, dx):
                    self.dx = dx.copy()
                    Tmp = np.mat(self.H)*col(dx)
                    Reg_Term   = self.Penalty.compute(xkd+flat(dx), Obj0)
                    self.Val   = (X + np.dot(dx, G) + 0.5*row(dx)*Tmp + Reg_Term[0] - data['X'])[0,0]
                    self.Grad  = flat(col(G) + Tmp) + Reg_Term[1]
                def compute_val(self, dx):
                    if norm(dx - self.dx) > 1e-8:
                        self._compute(dx)
                    return self.Val
                def compute_grad(self, dx):
                    if norm(dx - self.dx) > 1e-8:
                        self._compute(dx)
                    return self.Grad
                def compute_hess(self, dx):
                    if norm(dx - self.dx) > 1e-8:
                        self._compute(dx)
                    return self.Hess
            def hyper_solver(L):
                dx0 = np.zeros(len(xkd),dtype=float)
                #dx0 = np.delete(dx0, self.excision)
                # HL = H + (L-1)**2*np.diag(np.diag(H))
                # Attempt to use plain Levenberg
                HL = H + (L-1)**2*np.eye(len(H))

                HYP = Hyper(HL, self.Objective.Penalty)
                try:
                    Opt1 = optimize.fmin_bfgs(HYP.compute_val,dx0,fprime=HYP.compute_grad,gtol=1e-5,full_output=True,disp=0)
                except:
                    Opt1 = optimize.fmin(HYP.compute_val,dx0,full_output=True,disp=0)
                try:
                    Opt2 = optimize.fmin_bfgs(HYP.compute_val,-xkd,fprime=HYP.compute_grad,gtol=1e-5,full_output=True,disp=0)
                except:
                    Opt2 = optimize.fmin(HYP.compute_val,-xkd,full_output=True,disp=0)
                #Opt2 = optimize.fmin(HYP.compute_val,-xkd,full_output=True,disp=0)
                dx1, sol1 = Opt1[0], Opt1[1]
                dx2, sol2 = Opt2[0], Opt2[1]
                dxb, sol = (dx1, sol1) if sol1 <= sol2 else (dx2, sol2)
                for i in self.excision:    # Reinsert deleted coordinates - don't take a step in those directions
                    dxb = np.insert(dxb, i, 0)
                return dxb, sol
        else:
            # G0 and H0 are used for determining the expected function change.
            G0 = G.copy()
            H0 = H.copy()
            G = np.delete(G, self.excision)
            H = np.delete(H, self.excision, axis=0)
            H = np.delete(H, self.excision, axis=1)
            # print "Inverting Hessian:"                 ###
            # print " G:"                                ###
            # pvec1d(G,precision=5)                      ###
            # print " H:"                                ###
            # pmat2d(H,precision=5)                      ###
            Hi = invert_svd(np.mat(H))
            dx = flat(-1 * Hi * col(G))
            # print " dx:"                               ###
            # pvec1d(dx,precision=5)                     ###
            # dxa = -solve(H, G)          # Take Newton Raphson Step ; use -1*G if want steepest descent.
            # dxa = flat(dxa)
            # print " dxa:"                              ###
            # pvec1d(dxa,precision=5)                    ###
            print                                      ###
            for i in self.excision:    # Reinsert deleted coordinates - don't take a step in those directions
                dx = np.insert(dx, i, 0)
            def para_solver(L):
                # Levenberg-Marquardt
                # HT = H + (L-1)**2*np.diag(np.diag(H))
                # Attempt to use plain Levenberg
                HT = H + (L-1)**2*np.eye(len(H))
                # print "Inverting Scaled Hessian:"                       ###
                # print " G:"                                             ###
                # pvec1d(G,precision=5)                                   ###
                # print " HT: (Scal = %.4f)" % (1+(L-1)**2)               ###
                # pmat2d(HT,precision=5)                                  ###
                Hi = invert_svd(np.mat(HT))
                dx = flat(-1 * Hi * col(G))
                # print " dx:"                                            ###
                # pvec1d(dx,precision=5)                                  ###
                # dxa = -solve(HT, G)
                # dxa = flat(dxa)
                # print " dxa:"                                           ###
                # pvec1d(dxa,precision=5)                                 ###
                # print                                                   ###
                sol = flat(0.5*row(dx)*np.mat(H)*col(dx))[0] + np.dot(dx,G)
                for i in self.excision:    # Reinsert deleted coordinates - don't take a step in those directions
                    dx = np.insert(dx, i, 0)
                return dx, sol
    
        def solver(L):
            return hyper_solver(L) if self.bhyp else para_solver(L)
    
        def trust_fun(L):
            N = norm(solver(L)[0])
            #print "\rL = %.4e, Hessian diagonal addition = %.4e: found length %.4e, objective is %.4e" % (L, (L-1)**2, N, (N - trust)**2)
            return (N - trust)**2

        def search_fun(L):
            # Evaluate ONLY the objective function.  Most useful when
            # the objective is cheap, but the derivative is expensive.
            dx, sol = solver(L) # dx is how much the step changes from the previous step.
            # This is our trial step.
            xk_ = dx + xk
            Result = self.Objective.Full(xk_,0,verbose=False)['X'] - data['X']
            print "Searching! Hessian diagonal addition = %.4e, L = % .4e, length %.4e, result %.4e" % ((L-1)**2,L,norm(dx),Result)
            return Result
        
        if self.trust0 > 0: # This is the trust region code.
            bump = False
            dx, expect = solver(1)
            dxnorm = norm(dx)
            if dxnorm > trust:
                bump = True
                # Tried a few optimizers here, seems like Brent works well.
                # Okay, the problem with Brent is that the tolerance is fractional.  
                # If the optimized value is zero, then it takes a lot of meaningless steps.
                LOpt = optimize.brent(trust_fun,brack=(self.lmg,self.lmg*4),tol=1e-6)
                ### Result = optimize.fmin_powell(trust_fun,3,xtol=self.search_tol,ftol=self.search_tol,full_output=1,disp=0)
                ### LOpt = Result[0]
                dx, expect = solver(LOpt)
                dxnorm = norm(dx)
                # print "\rLevenberg-Marquardt: %s step found (length %.3e), Hessian diagonal is scaled by % .8f" % ('hyperbolic-regularized' if self.bhyp else 'Newton-Raphson', dxnorm, (LOpt-1)**2)
                print "\rLevenberg-Marquardt: %s step found (length %.3e), % .8f added to Hessian diagonal" % ('hyperbolic-regularized' if self.bhyp else 'Newton-Raphson', dxnorm, (LOpt-1)**2)
        else: # This is the nonlinear search code.
            # First obtain a step that is the same length as the provided trust radius.
            LOpt = optimize.brent(trust_fun,brack=(self.lmg,self.lmg*4),tol=1e-6)
            bump = False
            Result = optimize.brent(search_fun,brack=(LOpt,LOpt*4),tol=self.search_tol,full_output=1)
            ### optimize.fmin(search_fun,0,xtol=1e-8,ftol=data['X']*0.1,full_output=1,disp=0)
            ### Result = optimize.fmin_powell(search_fun,3,xtol=self.search_tol,ftol=self.search_tol,full_output=1,disp=0)
            dx, _ = solver(Result[0])
            expect = Result[1]

        ## Decide which parameters to redirect.
        ## Currently not used.
        if self.Objective.Penalty.ptyp in [3,4,5]:
            self.FF.make_redirect(dx+xk)

        return dx, expect, bump
Esempio n. 2
0
    def MainOptimizer(self,b_BFGS=0):
        """ The main ForceBalance adaptive trust-radius pseudo-Newton optimizer.  Tried and true in many situations. :)

        Usually this function is called with the BFGS or NewtonRaphson
        method.  The NewtonRaphson method is consistently the best
        method I have, because I always provide at least an
        approximate Hessian to the objective function.  The BFGS
        method is vestigial and currently does not work.

        BFGS is a pseudo-Newton method in the sense that it builds an
        approximate Hessian matrix from the gradient information in previous
        steps; Newton-Raphson requires the actual Hessian matrix.
        However, the algorithms are similar in that they both compute the
        step by inverting the Hessian and multiplying by the gradient.

        The method adaptively changes the step size.  If the step is
        sufficiently good (i.e. the objective function goes down by a
        large fraction of the predicted decrease), then the step size
        is increased; if the step is bad, then it rejects the step and
        tries again.

        The optimization is terminated after either a function value or
        step size tolerance is reached.

        @param[in] b_BFGS Switch to use BFGS (True) or Newton-Raphson (False)

        """
        if any(['liquid' in tgt.name.lower() for tgt in self.Objective.Targets]) and self.conv_obj < 1e-3:
            warn_press_key("Condensed phase targets detected - may not converge with current choice of convergence_objective (%.e)\nRecommended range is 1e-2 - 1e-1 for this option." % self.conv_obj)
        # Parameters for the adaptive trust radius
        a = self.adapt_fac  # Default value is 0.5, decrease to make more conservative.  Zero to turn off all adaptive.
        b = self.adapt_damp # Default value is 0.5, increase to make more conservative
        printcool( "Main Optimizer\n%s Mode%s" % ("BFGS" if b_BFGS else "Newton-Raphson", " (Static Radius)" if a == 0.0 else " (Adaptive Radius)"), ansi=1, bold=1)
        # First, set a bunch of starting values
        Ord         = 1 if b_BFGS else 2
        #Ord         = 2
        global ITERATION_NUMBER
        ITERATION_NUMBER = 0
        global GOODSTEP
        Best_Step = 1
        if all(i in self.chk for i in ['xk','X','G','H','ehist','x_best','xk_prev','trust']):
            print "Reading initial objective, gradient, Hessian from checkpoint file"
            xk, X, G, H, ehist     = self.chk['xk'], self.chk['X'], self.chk['G'], self.chk['H'], self.chk['ehist']
            X_best, xk_prev, trust = self.chk['x_best'], self.chk['xk_prev'], self.chk['trust']
        else:
            xk       = self.mvals0.copy()
            print
            data     = self.Objective.Full(xk,Ord,verbose=True)
            X, G, H  = data['X'], data['G'], data['H']
            ehist    = np.array([X])
            xk_prev  = xk.copy()
            trust    = abs(self.trust0)
            X_best   = X

        X_prev   = X
        G_prev   = G.copy()
        H_stor   = H.copy()
        ndx    = 0.0
        color  = "\x1b[1m"
        nxk = norm(xk)
        ngr = norm(G)

        Quality  = 0.0
        restep = False
        GOODSTEP = 1
        Ord         = 1 if b_BFGS else 2

        while 1: # Loop until convergence is reached.
            ## Put data into the checkpoint file
            self.chk = {'xk': xk, 'X' : X, 'G' : G, 'H': H, 'ehist': ehist,
                        'x_best': X_best,'xk_prev': xk_prev, 'trust': trust}
            if self.wchk_step:
                self.writechk()
            stdfront = len(ehist) > self.hist and np.std(np.sort(ehist)[:self.hist]) or (len(ehist) > 0 and np.std(ehist) or 0.0)
            stdfront *= 2
            print "%6s%12s%12s%12s%14s%12s%12s" % ("Step", "  |k|  ","  |dk|  "," |grad| ","    -=X2=-  ","Delta(X2)", "StepQual")
            print "%6i%12.3e%12.3e%12.3e%s%14.5e\x1b[0m%12.3e% 11.3f\n" % (ITERATION_NUMBER, nxk, ndx, ngr, color, X, stdfront, Quality)
            # Check the convergence criteria
            if ngr < self.conv_grd:
                print "Convergence criterion reached for gradient norm (%.2e)" % self.conv_grd
                break
            if ITERATION_NUMBER == self.maxstep:
                print "Maximum number of optimization steps reached (%i)" % ITERATION_NUMBER
                break
            if ndx < self.conv_stp and ITERATION_NUMBER > 0 and not restep:
                print "Convergence criterion reached in step size (%.2e)" % self.conv_stp
                break
            if stdfront < self.conv_obj and len(ehist) > self.hist and not restep: # Factor of two is so [0,1] stdev is normalized to 1
                print "Convergence criterion reached for objective function (%.2e)" % self.conv_obj
                break
            if self.print_grad:
                bar = printcool("Total Gradient",color=4)
                self.FF.print_map(vals=G,precision=8)
                print bar
            if self.print_hess:
                bar = printcool("Total Hessian",color=4)
                pmat2d(H,precision=8)
                print bar
            for key, val in self.Objective.ObjDict.items():
                if Best_Step:
                    self.Objective.ObjDict_Last[key] = val
            restep = False
            dx, dX_expect, bump = self.step(xk, data, trust)
            old_pk = self.FF.create_pvals(xk)
            old_xk = xk.copy()
            # Increment the iteration counter.
            ITERATION_NUMBER += 1
            # Take a step in the parameter space.
            xk += dx
            if self.print_vals:
                pk = self.FF.create_pvals(xk)
                dp = pk - old_pk
                bar = printcool("Mathematical Parameters (Current + Step = Next)",color=5)
                self.FF.print_map(vals=["% .4e %s %.4e = % .4e" % (old_xk[i], '+' if dx[i] >= 0 else '-', abs(dx[i]), xk[i]) for i in range(len(xk))])
                print bar
                bar = printcool("Physical Parameters (Current + Step = Next)",color=5)
                self.FF.print_map(vals=["% .4e %s %.4e = % .4e" % (old_pk[i], '+' if dp[i] >= 0 else '-', abs(dp[i]), pk[i]) for i in range(len(pk))])
                print bar
            # Evaluate the objective function and its derivatives.
            data        = self.Objective.Full(xk,Ord,verbose=True)
            X, G, H = data['X'], data['G'], data['H']
            ndx = norm(dx)
            nxk = norm(xk)
            ngr = norm(G)
            drc = abs(flat(dx)).argmax()

            dX_actual = X - X_prev
            try:
                Quality = dX_actual / dX_expect
            except:
                print "Warning: Step size of zero detected (i.e. wrong direction).  Try reducing the finite_difference_h parameter"
                Quality = 1.0 # This is a step length of zero.

            if Quality <= 0.25 and X < (X_prev + self.err_tol) and self.trust0 > 0:
                # If the step quality is bad, then we should decrease the trust radius.
                trust = max(ndx*(1./(1+a)), self.mintrust)
                print "Low quality step, reducing trust radius to % .4e" % trust
            if Quality >= 0.75 and bump and self.trust0 > 0:
                # If the step quality is good, then we should increase the trust radius.
                # The 'a' factor is how much we should grow or shrink the trust radius each step
                # and the 'b' factor determines how closely we are tied down to the original value.
                # Recommend values 0.5 and 0.5
                trust += a*trust*np.exp(-b*(trust/self.trust0 - 1))
            if X > (X_prev + self.err_tol):
                Best_Step = 0
                # Toggle switch for rejection (experimenting with no rejection)
                Rejects = True
                GOODSTEP = 0
                Reevaluate = True
                trust = max(ndx*(1./(1+a)), self.mintrust)
                print "Rejecting step and reducing trust radius to % .4e" % trust
                if Rejects:
                    xk = xk_prev.copy()
                    if Reevaluate:
                        restep = True
                        color = "\x1b[91m"
                        print "%6s%12s%12s%12s%14s%12s%12s" % ("Step", "  |k|  ","  |dk|  "," |grad| ","    -=X2=-  ","Delta(X2)", "StepQual")
                        print "%6i%12.3e%12.3e%12.3e%s%14.5e\x1b[0m%12.3e% 11.3f\n" % (ITERATION_NUMBER, nxk, ndx, ngr, color, X, stdfront, Quality)
                        printcool("Objective function rises!\nRe-evaluating at the previous point..",color=1)
                        ITERATION_NUMBER += 1
                        data        = self.Objective.Full(xk,Ord,verbose=True)
                        GOODSTEP = 1
                        X, G, H = data['X'], data['G'], data['H']
                        X_prev = X
                        dx *= 0
                        ndx = norm(dx)
                        nxk = norm(xk)
                        ngr = norm(G)
                        Quality = 0.0
                        color = "\x1b[0m"
                    else:
                        color = "\x1b[91m"
                        G = G_prev.copy()
                        H = H_stor.copy()
                        data = deepcopy(datastor)
                    continue
            else:
                GOODSTEP = 1
                if X > X_best:
                    Best_Step = 0
                    color = "\x1b[95m"
                else:
                    Best_Step = 1
                    color = "\x1b[92m"
                    X_best = X
                ehist = np.append(ehist, X)
            # Hessian update for BFGS.
            if b_BFGS:
                Hnew = H_stor.copy()
                Dx   = col(xk - xk_prev)
                Dy   = col(G  - G_prev)
                Mat1 = (Dy*Dy.T)/(Dy.T*Dx)[0,0]
                Mat2 = ((Hnew*Dx)*(Hnew*Dx).T)/(Dx.T*Hnew*Dx)[0,0]
                Hnew += Mat1-Mat2
                H = Hnew.copy()
                data['H'] = H.copy()

            datastor= deepcopy(data)
            G_prev  = G.copy()
            H_stor  = H.copy()
            xk_prev = xk.copy()
            X_prev  = X
            if len(self.FF.parmdestroy_this) > 0:
                self.FF.parmdestroy_save.append(self.FF.parmdestroy_this)
                self.FF.linedestroy_save.append(self.FF.linedestroy_this)
        
        bar = printcool("Final objective function value\nFull: % .6e  Un-penalized: % .6e" % (data['X'],data['X0']), '@', bold=True, color=2)
        return xk