def newton_test(xg, display=False): """ Use Newton's method to minimize cost function defined in cost module Input variable xg is initial guess for location of minimum. When display is true, a figure illustrating the convergence of the method should be generated Output variables: xf -- computed location of minimum, jf -- computed minimum Further output can be added to the tuple, output, as needed. It may also be left empty. NOTE- FOR PLOTTING PURPOSES PLEASE INPUT xg as a list, so the inbuilt latex will work in plot titles-- """ output = () dat = hw2.newton(xg) xf = dat[0] jf = dat[1] if display == True: #Generate current distance from the minimum xpath = hw2.xpath distances = [] for i in range(len(xpath)): temp = [1, 1] - xpath[i] distances.append(np.sqrt(temp[0]**2 + temp[1]**2)) plt.figure(figsize=(14, 7)) plt.suptitle('Lawrence Stewart - Created Using newton_test().') #plot the cost function at each point plt.subplot(121) plt.plot(np.arange(1, len(hw2.jpath) + 1, 1), hw2.jpath, alpha=0.8, color='r') plt.xlabel("Iteration") plt.ylabel("Cost") ax = plt.gca() ax.set_facecolor('#D9E6E8') plt.title("Cost at each Iteration of Netwons Method, xg =%s" % xg) plt.grid('on') plt.subplot(122) plt.plot(np.arange(1, len(hw2.jpath) + 1, 1), distances, alpha=0.8, color='r') plt.title("Distance from Minimum at Each Iteration, xg =%s" % xg) plt.xlabel("Iteration") plt.ylabel("Distance") ax = plt.gca() ax.set_facecolor('#D9E6E8') plt.grid('on') plt.show() return xf, jf, output
def newton_test(xg, display=False): """ Use Newton's method to minimize cost function defined in cost module Input variable xg is initial guess for location of minimum. When display is true, a figure illustrating the convergence of the method should be generated Output variables: xf -- computed location of minimum, jf -- computed minimum Further output can be added to the tuple, output, as needed. It may also be left empty. """ #call newton and extract paths cost.c_noise = False nw = hw2.newton(xg) xf = nw[0] jf = nw[1] jpath_nw = hw2.jpath.copy() xpath_nw = hw2.xpath.copy() #get the norm distance to [1,1] from each point norm_distance = np.linalg.norm((xpath_nw - [1,1]), axis=1) #plot if display is True: fig1 = plt.figure(figsize=(11,7)) x = list(range(len(jpath_nw))) ax = fig1.add_subplot(121) ax2 = fig1.add_subplot(122) ax.plot(jpath_nw, label='Cost') ax.set_xlabel('Iteration number') ax.set_ylabel('Value of the cost function') ax.set_title('Value of the cost function \n against Newton iterations starting at \n xguess={}.\n Igor Adamski'.format(xg,xg)) ax.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) ax.grid(linestyle=':', linewidth=0.5) ax.set_yscale('log') ax.set_xticks(x) ax2.plot(norm_distance, label='Distance from [1,1]') ax2.set_xlabel('Iteration number') ax2.set_ylabel('Norm distance to the minimum at [1,1]') ax2.set_title('Distance to the actual minimum at [1,1] \n at each Newton iteration, starting from \n xguess={}.\n Called by newton_test({}, True) Igor Adamski'.format(xg,xg)) ax2.set_xticks(x) ax2.grid(linestyle=':', linewidth=0.5) plt.show() return xf,jf
def newton_test(xg, display=False, i=1, timing=False): """ ============================================================================ Use Newton's method to minimize a cost function, j, defined in cost module. ============================================================================ Parameters ---------- xg : list Initial guess display : Boolean, Optional If set to True, figures will be created to illustrate the optimization path taken and the distance from convergence at each step. i=1 : Integer, Optional Sets the name of the figures as hw22i.png timing : Boolean, Optional If set to true, an average time will be calculated for the completion of finding a minimum and will be appended to the tuple output. Returns --------- xf : ndarray Computed location of minimum jf : float Computed minimum output : Tuple containing the time taken for the minimia to be found. An average over 10 tests, only set if timining parameter set to True, otherwise empty. Calling this function will produce a figure containing two subplots. The first will illustrate the location of each step in the minimization path, overlayed over the initial cost function. The second will illustrate the distance from the final, computed minimum at each iteration. """ cost.c_noise = False hw2.tol = 10**(-6) hw2.itermax = 1000 t21 = 0 output = () if timing: N = 10 else: N = 1 for j in range(1, N): t1 = time() hw2.newton(xg) t2 = time() t21 = t21 + (t2 - t1) X, Y = hw2.xpath xf = [X[-1], Y[-1]] jpathn = [j for j in hw2.jpath] jf = hw2.jpath[-1] output = (t21 / N, X, Y, jpathn) if display: Minx = min(X) - 1 Maxx = max(X) + 1 Miny = min(Y) - 1 Maxy = max(Y) + 1 [Xj, Yj] = np.linspace(Minx, Maxx, 200), np.linspace(Miny, Maxy, 200) #calculate noiseless cost function at each point on 2D grid j = [[cost.costj([xi, yi]) for xi in Xj] for yi in Yj] f, (p1, p2) = plt.subplots(1, 2) p1.contourf(Xj, Yj, j, locator=ticker.LogLocator(), cmap=cm.GnBu) p1.plot(X, Y, 'g', marker='d') p1.set_xlim(min(X) - 1, max(X) + 1) p1.set_xlabel('X1-location') p1.set_ylabel('X2-location') p1.set_title('Convergence Path') p2.plot(np.linspace(0, len(X) - 1, len(X)), hw2.jpath - jf) p2.set_xlabel('Iteration number') p2.set_ylabel('distance from converged minimum') p2.set_title('Rate') plt.suptitle('Rosemary Teague, Newton_test, initial guess =' + str(xg) + ' \n Convergence of a cost function') plt.tight_layout(pad=4) plt.savefig('hw22' + str(i), dpi=700) return xf, jf, output
def bracket_descent_test(xg,display=False): """ Use bracket-descent to minimize cost function defined in cost module Input variable xg is initial guess for location of minimum. When display is true, 1-2 figures comparing the B-D and Newton steps should be generated Output variables: xf -- computed location of minimum, jf -- computed minimum On the first figure we can see the paths that the Newton and bracket descent algorithms take to finally converge to the minimum. From that figure we can see that Newton tends to do very little steps (at most 6-7) and the steps that it takes are big. By big, I mean that Newton takes huge steps in directions that are far from the final converged minimum but always manages to return back. On the other hand bracket descent does lots of very small steps. The bracket descent steps are small but ultimately lead it to the minimum. We can also see that bracket descent doesnt do jumps in opposite directions to which its supposed to go in contrast to Newton. On the second figure we can see the normed distance between the current point and the true minimum at [1,1] plotted against the iterations. We can see that Newton at first jumps very far from the actual minimum, only to 2 steps later converge to [1,1]. On the other hand, bracket descent tends to stay on the trajectory going down except a few times when it goes further from [1,1] for a bit only to return promptly. Overall, its visible that newton takes a lot less (a factor of hundreds) iterations to converge than bracket descent. The behaviour of Newton is not surprising in terms of the number of iterations, as it relies on the gradient and hessian to guide it the right way. Gradient and hessian give the information about the curvature of the cost surface and this makes newton always follow the right track. ON the other hand, bracket descent does not possess such high level information about the cost function and it simply iterates and finds smaller values of the cost function and proceeds to take little steps adjusting the triangles vertices. Bracket descent takes small steps because by construction its only allowed to make changes to the vertices of at most 4 heights of the traingle. Newton takes big steps because the gradient and hessian can lead him far away only to return to the minimum in the next step. """ #call bracket descent and extract paths cost.c_noise = False bd = hw2.bracket_descent(xg) xf = bd[0] jf = bd[1] xpath_bd = hw2.xpath.copy() #computed normed distance to [1,1] norm_distance_bd = np.linalg.norm((xpath_bd - [1,1]), axis=1) #purely esthetics of plot if np.linalg.norm(xg)<10: scale="linear" else: scale="symlog" #plot if display is True: fig1 = plt.figure() fig2 = plt.figure() nw = hw2.newton(xg) xpath_nw = hw2.xpath norm_distance_nw = np.linalg.norm((xpath_nw - [1,1]), axis=1) ax = fig1.add_subplot(111) ax.plot(xpath_bd[:,0], xpath_bd[:,1], 'r', label='Bracket descent', linewidth = 1) ax.scatter(xpath_bd[:,0], xpath_bd[:,1], s=10, c='black') ax.scatter(xg[0], xg[1], c='green', label='Initial guess') ax.plot(xpath_nw[:,0], xpath_nw[:,1], 'b', label='Newton') #ax.plot(xf[0], xf[1], #'c*', label='Final convergence of bracket_descent') ax.scatter(nw[0][0], nw[0][1], c='orange', marker='D', label='Actual minimum') ax.set_yscale(scale) ax.set_ylabel('y-coordinate') ax.set_xlabel('x-coordinate') ax.set_title('The paths of bracket descent and Newton \n starting from xguess={}. \n Cmd. bracket_descent_test({},True), Igor Adamski'.format(xg, xg)) ax.legend() ax2 = fig2.add_subplot(111) ax2.plot(norm_distance_nw, label='Newton') ax2.plot(norm_distance_bd, label='Bracket descent') ax2.set_xlabel('Iterations') ax2.set_ylabel('Norm distance from [1,1]') ax2.set_yscale(scale) ax2.set_title('Norm distance from minimum at [1,1] against \n the number of iterations. \n Cmd. bracket_descent_test({},True). Igor Adamski'.format(xg)) ax2.legend() plt.show() return xf, jf
def bracket_descent_test(xg, display=False): """ Use bracket-descent to minimize cost function defined in cost module Input variable xg is initial guess for location of minimum. When display is true, 1-2 figures comparing the B-D and Newton steps should be generated Output variables: xf -- computed location of minimum, jf -- computed minimum Discussion and Explanation: Bracket descent and newtons test operate in very different fashions as seen in the figures attatched. hw231.png shows the step size, h, that is taken at each iteration of both the algorithms. Distance is defined with the usual euclidean norm, and one can see that Newtons method takes initally very large steps, approximately 10000 (for the starting point [100,10]). The step size drastically decreases as the algorithm converges upon the minimum. The reason for such a high initial step size is the gradient based framework that Newtons method operates from, allowing it to initially move in large steps towards the minimum. Bracket Descent remains approximately constant in the step size, which is to be expected due to the triangular method of descent that the algorithm utilizes, (the algorithm is bounded in the step size it can do). h232.png shows the wallclock and CPU time for both of the methods. Due to a faster convergance, Newtons method terminates after a shorter duration, for both CPU and Wallclock time; approximately 0.00001 wallclock and roughly the same for CPU. Bracket descent takes longer to converge, with approximately 0.00005 for wallclock and CPU time. If we were to take points that were further away from the global minimum of [1,1], we would see this result extrapolated, due to the constant nature of the stepsize of Bracket Descent. The size of newton's tests intial movements would increase with a further away starting point, and the time would remain small. """ if display == False: xf, jf = hw2.bracket_descent(xg) if display == True: average_bd_wallclock = 0 average_newton_wallclock = 0 average_newton_cpu_time = 0 average_bd_cpu_time = 0 #Time over an average: for i in range(20): t1 = time.time() #start timer 1 tp1 = time.process_time() #start timer 2 #Run bracket descent xf, jf = hw2.bracket_descent(xg) t2 = time.time() #t2-t1 gives wallclock time tp2 = time.process_time( ) #tp2-tp1 gives cpu time -- depends on number of cores! bd_wallclock = t2 - t1 bd_cpu_time = tp2 - tp1 average_bd_wallclock += bd_wallclock average_bd_cpu_time += bd_cpu_time xpath = hw2.xpath bracket_steps = [ xpath[i + 1] - xpath[i] for i in range(len(xpath) - 1) ] newton_steps = [ hw2.newtonstep(xpath[i]) for i in range(len(xpath) - 1) ] bracket_steps_dist = [ np.sqrt(bracket_steps[i][0]**2 + bracket_steps[i][1]**2) for i in range(len(bracket_steps)) ] newton_steps_dist = [ np.sqrt(newton_steps[i][0]**2 + newton_steps[i][1]**2) for i in range(len(newton_steps)) ] steps = np.arange(1, len(bracket_steps) + 1, 1) ratio_steps = [] for i in range(len(bracket_steps_dist)): ratio_steps.append(newton_steps_dist[i] / bracket_steps_dist[i]) #Run newton for timing as well #Time for i in range(20): t1 = time.time() #start timer 1 tp1 = time.process_time() #start timer 2 #newtons hw2.newton(xg) t2 = time.time() #t2-t1 gives wallclock time tp2 = time.process_time( ) #tp2-tp1 gives cpu time -- depends on number of cores! newton_wallclock = t2 - t1 newton_cpu_time = tp2 - tp1 average_newton_wallclock += newton_wallclock average_newton_cpu_time += newton_cpu_time #divide by 20 to create the averages average_newton_cpu_time = average_newton_cpu_time / 20 average_newton_wallclock = average_newton_wallclock / 20 average_bd_wallclock = average_bd_wallclock / 20 average_bd_cpu_time = average_bd_cpu_time / 20 plt.figure() # plt.subplot(121) plt.title( "Step (h) Comparison for Newtons and Bracket Descent with xg=%s" % xg) plt.suptitle( 'Lawrence Stewart - Created Using bracket_descent_test().') plt.xlabel('Iteration number') plt.ylabel('Size of step h') plt.plot(steps, bracket_steps_dist, label="Bracket Descent", alpha=0.8, color='r') plt.plot(steps, newton_steps_dist, label="Newtons Method", alpha=0.7) ax = plt.gca() plt.grid('on') ax.set_facecolor('#D9E6E8') plt.legend() # plt.subplot(122) # plt.plot(steps,ratio_steps,alpha=0.7,color='r') # plt.title("Ratio of Newton Step taken over Bracket Step taken at Each Iteration") # plt.xlabel("Iteration") # plt.ylabel("Newton Step/ Bracket Step") # plt.grid('on') # ax = plt.gca() # ax.set_facecolor('#D9E6E8') # plt.show() #Plot timings: fig, ax = plt.subplots() plt.suptitle( 'Lawrence Stewart - Created Using bracket_descent_test().') index = np.arange(2) bar_width = 0.35 opacity = 0.7 rects1 = plt.bar(index, (average_newton_wallclock, average_newton_cpu_time), bar_width, alpha=opacity, color='c', label='Newton') rects2 = plt.bar(index + bar_width, (average_bd_wallclock, average_bd_cpu_time), bar_width, alpha=opacity, color='m', label='BD') plt.xticks(index + bar_width, ('Wallclock Time', 'CPU Time')) plt.legend() ax = plt.gca() ax.set_facecolor('#D9E6E8') fig.tight_layout(rect=[0, 0.03, 1, 0.95]) plt.title("Average Timings for Bracket Descent and Newtons for xg=%s" % xg) plt.ylabel("Time (s)") plt.show() return xf, jf