def run_algo(self, algo, **kwargs): # Get function and compute gradient self.g = self.linear_least_squares self.grad = compute_grad(self.g) # choose algorithm self.algo = algo if self.algo == 'gradient_descent': self.alpha = 10**-3 if 'alpha' in kwargs: self.alpha = kwargs['alpha'] self.max_its = 10 if 'max_its' in kwargs: self.max_its = kwargs['max_its'] self.w_init = np.random.randn(2) if 'w_init' in kwargs: self.w_init = kwargs['w_init'] self.w_init = np.asarray([float(s) for s in self.w_init]) self.w_init.shape = (np.size(self.w_init), 1) # run algorithm of choice if self.algo == 'gradient_descent': self.w_hist = [] self.gradient_descent() if self.algo == 'newtons_method': self.hess = compute_hess(self.g) # hessian of input function self.beta = 0 if 'beta' in kwargs: self.beta = kwargs['beta'] self.w_hist = [] self.newtons_method()
def newtons_method(self, g, w, **kwargs): # create gradient and hessian functions self.g = g # flatten gradient for simpler-written descent loop flat_g, unflatten, w = flatten_func(self.g, w) self.grad = compute_grad(flat_g) self.hess = compute_hess(flat_g) # parse optional arguments max_its = 20 if 'max_its' in kwargs: max_its = kwargs['max_its'] self.epsilon = 10**(-5) if 'epsilon' in kwargs: self.epsilon = kwargs['epsilon'] verbose = False if 'verbose' in kwargs: verbose = kwargs['verbose'] # create container for weight history w_hist = [] w_hist.append(unflatten(w)) # start newton's method loop if verbose == True: print('starting optimization...') geval_old = flat_g(w) for k in range(max_its): # compute gradient and hessian grad_val = self.grad(w) hess_val = self.hess(w) hess_val.shape = (np.size(w), np.size(w)) # solve linear system for weights w = w - np.dot( np.linalg.pinv(hess_val + self.epsilon * np.eye(np.size(w))), grad_val) # eject from process if reaching singular system geval_new = flat_g(w) if k > 2 and geval_new > geval_old: print('singular system reached') time.sleep(1.5) clear_output() return w_hist else: geval_old = geval_new # record current weights w_hist.append(unflatten(w)) if verbose == True: print('...optimization complete!') time.sleep(1.5) clear_output() return w_hist
def __init__(self,**args): self.g = args['g'] # input function self.grad = compute_grad(self.g) # gradient of input function self.hess = compute_hess(self.g) # hessian of input function self.w_init =float( -2) # user-defined initial point (adjustable when calling each algorithm) self.alpha = 10**-4 # user-defined step length for gradient descent (adjustable when calling gradient descent) self.max_its = 20 # max iterations to run for each algorithm self.w_hist = [] # container for algorithm path self.colors = [[0,1,0.25],[0,0.75,1]] # set of custom colors used for plotting self.beta = 0
def newtons_method(g,w,x,y,beta,max_its): # flatten gradient for simpler-written descent loop flat_g, unflatten, w = flatten_func(g, w) grad = compute_grad(flat_g) hess = compute_hess(flat_g) # create container for weight history w_hist = [] w_hist.append(unflatten(w)) g_hist = [] geval_old = flat_g(w,x,y,beta) g_hist.append(geval_old) # main loop epsilon = 10**(-7) for k in range(max_its): # compute gradient and hessian grad_val = grad(w,x,y,beta) hess_val = hess(w,x,y,beta) hess_val.shape = (np.size(w),np.size(w)) # solve linear system for weights w = w - np.dot(np.linalg.pinv(hess_val + epsilon*np.eye(np.size(w))),grad_val) # eject from process if reaching singular system geval_new = flat_g(w,x,y,beta) if k > 2 and geval_new > geval_old: print ('singular system reached') time.sleep(1.5) clear_output() return w_hist else: geval_old = geval_new # record current weights w_hist.append(unflatten(w)) g_hist.append(geval_new) return w_hist,g_hist
def newtons_method(self, g, w_hist, savepath, **kwargs): # compute gradient and hessian of input grad = compute_grad(g) # gradient of input function hess = compute_hess(g) # hessian of input function # set viewing range wmax = 3 if 'wmax' in kwargs: wmax = kwargs['wmax'] wmin = -wmax if 'wmin' in kwargs: wmin = kwargs['wmin'] # initialize figure fig = plt.figure(figsize=(9, 4)) artist = fig # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1]) ax1 = plt.subplot(gs[0]) ax1.axis('off') ax3 = plt.subplot(gs[2]) ax3.axis('off') ax = plt.subplot(gs[1]) # generate function for plotting on each slide w_plot = np.linspace(wmin, wmax, 1000) g_plot = g(w_plot) g_range = max(g_plot) - min(g_plot) ggap = g_range * 0.1 w_vals = np.linspace(-2.5, 2.5, 50) width = 1 # make color spectrum for points colorspec = self.make_colorspec(w_hist) # animation sub-function print('starting animation rendering...') num_frames = 2 * len(w_hist) + 2 def animate(t): ax.cla() k = math.floor((t + 1) / float(2)) # print rendering update if np.mod(k + 1, 25) == 0: print('rendering animation frame ' + str(k + 1) + ' of ' + str(num_frames)) if t == num_frames - 1: print('animation rendering complete!') time.sleep(1.5) clear_output() # plot function ax.plot(w_plot, g_plot, color='k', zorder=1) # plot function # plot initial point and evaluation if k == 0: w_val = w_hist[0] g_val = g(w_val) ax.scatter(w_val, g_val, s=100, c=colorspec[k], edgecolor='k', linewidth=0.7, marker='X', zorder=2) # plot point of tangency ax.scatter(w_val, 0, s=100, c=colorspec[k], edgecolor='k', linewidth=0.7, zorder=2) # draw dashed line connecting w axis to point on cost function s = np.linspace(0, g_val) o = np.ones((len(s))) ax.plot(o * w_val, s, 'k--', linewidth=1, zorder=0) # plot all input/output pairs generated by algorithm thus far if k > 0: # plot all points up to this point for j in range(min(k - 1, len(w_hist))): w_val = w_hist[j] g_val = g(w_val) ax.scatter(w_val, g_val, s=90, c=colorspec[j], edgecolor='k', marker='X', linewidth=0.7, zorder=3) # plot point of tangency ax.scatter(w_val, 0, s=90, facecolor=colorspec[j], edgecolor='k', linewidth=0.7, zorder=2) # plot surrogate function and travel-to point if k > 0 and k < len(w_hist) + 1: # grab historical weight, compute function and derivative evaluations w_eval = w_hist[k - 1] if type(w_eval) != float: w_eval = float(w_eval) # plug in value into func and derivative g_eval = g(w_eval) g_grad_eval = grad(w_eval) g_hess_eval = hess(w_eval) # determine width of plotting area for second order approximator width = 0.5 if g_hess_eval < 0: width = -width # setup quadratic formula params a = 0.5 * g_hess_eval b = g_grad_eval - 2 * 0.5 * g_hess_eval * w_eval c = 0.5 * g_hess_eval * w_eval**2 - g_grad_eval * w_eval - width # solve for zero points w1 = (-b + math.sqrt(b**2 - 4 * a * c)) / float(2 * a + 0.00001) w2 = (-b - math.sqrt(b**2 - 4 * a * c)) / float(2 * a + 0.00001) # compute second order approximation wrange = np.linspace(w1, w2, 100) h = g_eval + g_grad_eval * ( wrange - w_eval) + 0.5 * g_hess_eval * (wrange - w_eval)**2 # plot tangent curve ax.plot(wrange, h, color=colorspec[k - 1], linewidth=2, zorder=2) # plot approx # plot tangent point ax.scatter(w_eval, g_eval, s=100, c='m', edgecolor='k', marker='X', linewidth=0.7, zorder=3) # plot point of tangency # plot next point learned from surrogate if np.mod(t, 2) == 0: # create next point information w_zero = w_eval - g_grad_eval / (g_hess_eval + 10**-5) g_zero = g(w_zero) h_zero = g_eval + g_grad_eval * ( w_zero - w_eval) + 0.5 * g_hess_eval * (w_zero - w_eval)**2 # draw dashed line connecting the three vals = [0, h_zero, g_zero] vals = np.sort(vals) s = np.linspace(vals[0], vals[2]) o = np.ones((len(s))) ax.plot(o * w_zero, s, 'k--', linewidth=1) # draw intersection at zero and associated point on cost function you hop back too ax.scatter(w_zero, h_zero, s=100, c='b', linewidth=0.7, marker='X', edgecolor='k', zorder=3) ax.scatter(w_zero, 0, s=100, c='m', edgecolor='k', linewidth=0.7, zorder=3) ax.scatter(w_zero, g_zero, s=100, c='m', edgecolor='k', linewidth=0.7, marker='X', zorder=3) # plot point of tangency # fix viewing limits on panel ax.set_xlim([wmin, wmax]) ax.set_ylim( [min(-0.3, min(g_plot) - ggap), max(max(g_plot) + ggap, 0.3)]) # add horizontal axis ax.axhline(y=0, color='k', zorder=0, linewidth=0.5) # label axes ax.set_xlabel(r'$w$', fontsize=14) ax.set_ylabel(r'$g(w)$', fontsize=14, rotation=0, labelpad=25) # set tickmarks ax.set_xticks(np.arange(round(wmin), round(wmax) + 1, 1.0)) ax.set_yticks( np.arange(round(min(g_plot) - ggap), round(max(g_plot) + ggap) + 1, 1.0)) return artist, anim = animation.FuncAnimation(fig, animate, frames=num_frames, interval=num_frames, blit=True) # produce animation and save fps = 50 if 'fps' in kwargs: fps = kwargs['fps'] anim.save(savepath, fps=fps, extra_args=['-vcodec', 'libx264']) clear_output()
def newtons_method(self, g, win, **kwargs): # flatten gradient for simpler-written descent loop self.g, unflatten, w = flatten_func(g, win) self.grad = compute_grad(self.g) self.hess = compute_hess(self.g) # parse optional arguments max_its = 20 if 'max_its' in kwargs: max_its = kwargs['max_its'] self.epsilon = 10**-10 if 'epsilon' in kwargs: self.epsilon = kwargs['epsilon'] verbose = True if 'verbose' in kwargs: verbose = kwargs['verbose'] output = 'history' if 'output' in kwargs: output = kwargs['output'] self.counter = copy.deepcopy(self.g) if 'counter' in kwargs: counter = kwargs['counter'] self.counter, unflatten, w = flatten_func(counter, win) # create container for weight history w_hist = [] w_hist.append(unflatten(copy.deepcopy(w))) # start newton's method loop if verbose == True: print('starting optimization...') geval_old = self.g(w) self.w_best = unflatten(copy.deepcopy(w)) g_best = self.counter(w) w_hist = [] if output == 'history': w_hist.append(unflatten(w)) # loop for k in range(max_its): # compute gradient and hessian grad_val = self.grad(w) hess_val = self.hess(w) hess_val.shape = (np.size(w), np.size(w)) # solve linear system for weights C = hess_val + self.epsilon * np.eye(np.size(w)) w = np.linalg.solve(C, np.dot(C, w) - grad_val) # eject from process if reaching singular system geval_new = self.g(w) if k > 2 and geval_new > geval_old: print('singular system reached') time.sleep(1.5) clear_output() if output == 'history': return w_hist elif output == 'best': return self.w_best else: geval_old = geval_new # record current weights if output == 'best': if self.g(w) < g_best: g_best = self.counter(w) self.w_best = copy.deepcopy(unflatten(w)) w_hist.append(unflatten(w)) if verbose == True: print('...optimization complete!') time.sleep(1.5) clear_output() if output == 'best': return self.w_best elif output == 'history': return w_hist