def show_stationary_v2(func1,func2,func3,**kwargs): ''' Input three functions, draw each highlighting their stationary points and draw tangent lines, draw the first and second derivatives stationary point evaluations on each as well ''' # define input space w = np.linspace(-3,3,5000) # input range for original function if 'w' in kwargs: w = kwargs['w'] # construct figure fig = plt.figure(figsize = (7,5)) # remove whitespace from figure #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace fig.subplots_adjust(wspace=0.2,hspace=0.8) # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(3, 3, width_ratios=[1,1,1]) ###### draw function, tangent lines, etc., ###### for k in range(3): ax = plt.subplot(gs[k]); ax2 = plt.subplot(gs[k+3],sharex=ax); ax3 = plt.subplot(gs[k+6],sharex=ax); func = func1 if k == 1: func = func2 if k == 2: func = func3 # generate a range of values over which to plot input function, and derivatives g_plot = func(w) grad = compute_grad(func) grad_plot = np.array([grad(s) for s in w]) wgap = (max(w) - min(w))*0.1 ggap = (max(g_plot) - min(g_plot))*0.1 grad_gap = (max(grad_plot) - min(grad_plot))*0.1 hess = compute_grad(grad) hess_plot = np.array([hess(s) for s in w]) hess_gap = (max(hess_plot) - min(hess_plot))*0.1 # plot first in top panel, derivative in bottom panel ax.plot(w,g_plot,color = 'k',zorder = 1,linewidth=2) ax.set_title(r'$g(w)$',fontsize = 12) ax.set_xlim([min(w)-wgap,max(w)+wgap]) ax.set_ylim([min(g_plot) - ggap, max(g_plot) + ggap]) # plot derivative and horizontal axis ax2.plot(w,grad_plot,color = 'k',zorder = 1,linewidth = 2) ax2.plot(w,grad_plot*0,color = 'k',zorder = 1,linewidth = 1,linestyle = '--') ax2.set_title(r'$\frac{\mathrm{d}}{\mathrm{d}w}g(w)$',fontsize = 12) ax2.set_ylim([min(grad_plot) - grad_gap, max(grad_plot) + grad_gap]) # plot second derivative and horizontal axis ax3.plot(w,hess_plot,color = 'k',zorder = 1,linewidth = 2) ax3.plot(w,hess_plot*0,color = 'k',zorder = 1,linewidth = 1,linestyle = '--') ax3.set_title(r'$\frac{\mathrm{d}^2}{\mathrm{d}w^2}g(w)$',fontsize = 12) ax3.set_ylim([min(hess_plot) - hess_gap, max(hess_plot) + hess_gap]) # clean up and label axes ax.tick_params(labelsize=6) ax2.tick_params(labelsize=6) ax3.tick_params(labelsize=6) # determine zero derivative points 'visually' grad_station = copy.deepcopy(grad_plot) grad_station = np.sign(grad_station) ind = [] for i in range(len(grad_station)-1): pt1 = grad_station[i] pt2 = grad_station[i+1] plot_pt1 = grad_plot[i] plot_pt2 = grad_plot[i+1] # if either point is zero add to list if pt1 == 0 or abs(plot_pt1) < 10**-5: ind.append(i) if pt2 == 0: ind.append(i+1) # if grad difference is small then sign change has taken place, add to list gap = abs(pt1 + pt2) if gap < 2 and pt1 !=0 and pt2 != 0: ind.append(i) # keep unique pts ind = np.unique(ind) # plot the input/output tangency points and tangent line wtan = np.linspace(-1,1,500) # input range for original function for pt in ind: # plot point w_val = w[pt] g_val = func(w_val) grad_val = grad(w_val) hess_val = hess(w_val) ax.scatter(w_val,g_val,s = 40,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3) # plot point of tangency ax2.scatter(w_val,grad_val,s = 40,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3) # plot point of tangency ax3.scatter(w_val,hess_val,s = 40,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3) # plot point of tangency # plot tangent line in original space w1 = w_val - 1 w2 = w_val + 1 wrange = np.linspace(w1,w2, 100) h = g_val + 0*(wrange - w_val) ax.plot(wrange,h,color = 'lime',alpha = 0.5,linewidth = 1.5,zorder = 2) # plot approx plt.show()
def show_stationary_1func(func,**kwargs): ''' Input one functions, draw each highlighting its stationary points ''' # define input space wmax = -3 if 'wmax' in kwargs: wmax = kwargs['wmax'] w = np.linspace(-wmax,wmax,5000) # input range for original function # construct figure fig = plt.figure(figsize = (6,3)) # remove whitespace from figure #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace fig.subplots_adjust(wspace=0.3,hspace=0.4) # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[1,1]) ###### draw function, tangent lines, etc., ###### ax = plt.subplot(gs[0]); ax2 = plt.subplot(gs[1],sharey=ax); # generate a range of values over which to plot input function, and derivatives g_plot = func(w) grad = compute_grad(func) grad_plot = np.array([grad(s) for s in w]) wgap = (max(w) - min(w))*0.1 ggap = (max(g_plot) - min(g_plot))*0.1 grad_gap = (max(grad_plot) - min(grad_plot))*0.1 # plot first in top panel, derivative in bottom panel ax.plot(w,g_plot,color = 'k',zorder = 1,linewidth=2) ax.set_title(r'$g(w)$',fontsize = 12) ax.set_xlim([min(w)-wgap,max(w)+wgap]) ax.set_ylim([min(g_plot) - ggap, max(g_plot) + ggap]) # plot function with stationary points marked ax2.plot(w,g_plot,color = 'k',zorder = 1,linewidth = 2) ax2.set_title(r'$g(w)$',fontsize = 12) ax2.set_ylim([min(g_plot) - ggap, max(g_plot) + ggap]) # clean up and label axes ax.tick_params(labelsize=6) ax2.tick_params(labelsize=6) # determine zero derivative points 'visually' grad_station = copy.deepcopy(grad_plot) grad_station = np.sign(grad_station) ind = [] for i in range(len(grad_station)-1): pt1 = grad_station[i] pt2 = grad_station[i+1] plot_pt1 = grad_plot[i] plot_pt2 = grad_plot[i+1] # if either point is zero add to list if pt1 == 0 or abs(plot_pt1) < 10**-5: ind.append(i) if pt2 == 0: ind.append(i+1) # if grad difference is small then sign change has taken place, add to list gap = abs(pt1 + pt2) if gap < 2 and pt1 !=0 and pt2 != 0: ind.append(i) # keep unique pts ind = np.unique(ind) # plot the input/output tangency points and tangent line wtan = np.linspace(-1,1,500) # input range for original function for pt in ind: # plot point w_val = w[pt] g_val = func(w_val) grad_val = grad(w_val) ax2.scatter(w_val,g_val,s = 40,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3) # plot point of tangency plt.show()
def compare_2d3d(func1,func2,**kwargs): # input arguments view = [20,-65] if 'view' in kwargs: view = kwargs['view'] # define input space w = np.linspace(-3,3,200) # input range for original function if 'w' in kwargs: w = kwargs['w'] # define pts pt1 = 0 if 'pt1' in kwargs: pt1 = kwargs['pt1'] pt2 = [0,0] if 'pt2' in kwargs: pt2 = kwargs['pt2'] # construct figure fig = plt.figure(figsize = (6,3)) # remove whitespace from figure fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace fig.subplots_adjust(wspace=0.01,hspace=0.01) # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[1,2]) ### draw 2d version ### ax1 = plt.subplot(gs[0]); grad = compute_grad(func1) # generate a range of values over which to plot input function, and derivatives g_plot = func1(w) g_range = max(g_plot) - min(g_plot) # used for cleaning up final plot ggap = g_range*0.2 # grab the next input/output tangency pair, the center of the next approximation(s) pt1 = float(pt1) g_val = func1(pt1) # plot original function ax1.plot(w,g_plot,color = 'k',zorder = 1,linewidth=2) # plot the input/output tangency point ax1.scatter(pt1,g_val,s = 60,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3) # plot point of tangency #### plot first order approximation #### # plug input into the first derivative g_grad_val = grad(pt1) # compute first order approximation w1 = pt1 - 3 w2 = pt1 + 3 wrange = np.linspace(w1,w2, 100) h = g_val + g_grad_val*(wrange - pt1) # plot the first order approximation ax1.plot(wrange,h,color = 'lime',alpha = 0.5,linewidth = 3,zorder = 2) # plot approx # make new x-axis ax1.plot(w,g_plot*0,linewidth=3,color = 'k') #### clean up panel #### # fix viewing limits on panel ax1.set_xlim([min(w),max(w)]) ax1.set_ylim([min(min(g_plot) - ggap,-4),max(max(g_plot) + ggap,0.5)]) # label axes ax1.set_xlabel('$w$',fontsize = 12,labelpad = -50) ax1.set_ylabel('$g(w)$',fontsize = 25,rotation = 0,labelpad = 50) ax1.grid(False) ax1.yaxis.set_visible(False) ax1.spines['right'].set_visible(False) ax1.spines['top'].set_visible(False) ax1.spines['left'].set_visible(False) ### draw 3d version ### ax2 = plt.subplot(gs[1],projection='3d'); grad = compute_grad(func2) w_val = [float(0),float(0)] # define input space w1_vals, w2_vals = np.meshgrid(w,w) w1_vals.shape = (len(w)**2,1) w2_vals.shape = (len(w)**2,1) w_vals = np.concatenate((w1_vals,w2_vals),axis=1).T g_vals = func2(w_vals) # evaluation points w_val = np.array([float(pt2[0]),float(pt2[1])]) w_val.shape = (2,1) g_val = func2(w_val) grad_val = grad(w_val) grad_val.shape = (2,1) # create and evaluate tangent hyperplane w1tan_vals, w2tan_vals = np.meshgrid(w,w) w1tan_vals.shape = (len(w)**2,1) w2tan_vals.shape = (len(w)**2,1) wtan_vals = np.concatenate((w1tan_vals,w2tan_vals),axis=1).T #h = lambda weh: g_val + np.dot( (weh - w_val).T,grad_val) h = lambda weh: g_val + (weh[0]-w_val[0])*grad_val[0] + (weh[1]-w_val[1])*grad_val[1] h_vals = h(wtan_vals + w_val) # vals for cost surface, reshape for plot_surface function w1_vals.shape = (len(w),len(w)) w2_vals.shape = (len(w),len(w)) g_vals.shape = (len(w),len(w)) w1tan_vals += w_val[0] w2tan_vals += w_val[1] w1tan_vals.shape = (len(w),len(w)) w2tan_vals.shape = (len(w),len(w)) h_vals.shape = (len(w),len(w)) ### plot function ### ax2.plot_surface(w1_vals, w2_vals, g_vals, alpha = 0.5,color = 'w',rstride=25, cstride=25,linewidth=1,edgecolor = 'k',zorder = 2) ### plot z=0 plane ### ax2.plot_surface(w1_vals, w2_vals, g_vals*0, alpha = 0.1,color = 'w',zorder = 1,rstride=25, cstride=25,linewidth=0.3,edgecolor = 'k') ### plot tangent plane ### ax2.plot_surface(w1tan_vals, w2tan_vals, h_vals, alpha = 0.4,color = 'lime',zorder = 1,rstride=50, cstride=50,linewidth=1,edgecolor = 'k') # scatter tangency ax2.scatter(w_val[0],w_val[1],g_val,s = 70,c = 'lime',edgecolor = 'k',linewidth = 2) ### clean up plot ### # plot x and y axes, and clean up ax2.xaxis.pane.fill = False ax2.yaxis.pane.fill = False ax2.zaxis.pane.fill = False #ax2.xaxis.pane.set_edgecolor('white') ax2.yaxis.pane.set_edgecolor('white') ax2.zaxis.pane.set_edgecolor('white') # remove axes lines and tickmarks ax2.w_zaxis.line.set_lw(0.) ax2.set_zticks([]) ax2.w_xaxis.line.set_lw(0.) ax2.set_xticks([]) ax2.w_yaxis.line.set_lw(0.) ax2.set_yticks([]) # set viewing angle ax2.view_init(view[0],view[1]) # set vewing limits wgap = (max(w) - min(w))*0.4 y = max(w) + wgap ax2.set_xlim([-y,y]) ax2.set_ylim([-y,y]) zmin = min(np.min(g_vals),-0.5) zmax = max(np.max(g_vals),+0.5) ax2.set_zlim([zmin,zmax]) # label plot fontsize = 12 ax2.set_xlabel(r'$w_1$',fontsize = fontsize,labelpad = -30) ax2.set_ylabel(r'$w_2$',fontsize = fontsize,rotation = 0,labelpad=-30) plt.show()
def run(self, g, w_init, steplength_vals, max_its, **kwargs): # count up steplength vals step_count = len(steplength_vals) ### input arguments ### self.g = g self.max_its = max_its self.grad = compute_grad(self.g) # gradient of input function self.w_init = w_init pts = 'off' if 'pts' in kwargs: pts = 'off' linewidth = 2.5 if 'linewidth' in kwargs: linewidth = kwargs['linewidth'] view = [20, -50] if 'view' in kwargs: view = kwargs['view'] axes = False if 'axes' in kwargs: axes = kwargs['axes'] plot_final = False if 'plot_final' in kwargs: plot_final = kwargs['plot_final'] num_contours = 15 if 'num_contours' in kwargs: num_contours = kwargs['num_contours'] # version of gradient descent to use (normalized or unnormalized) self.version = 'unnormalized' if 'version' in kwargs: self.version = kwargs['version'] # get initial point if np.size(self.w_init) == 2: self.w_init = np.asarray([float(s) for s in self.w_init]) else: self.w_init = float(self.w_init) # take in user defined maximum number of iterations self.max_its = max_its ##### construct figure with panels ##### # loop over steplengths, plot panels for each count = 0 for step in steplength_vals: # construct figure fig, axs = plt.subplots(1, 2, figsize=(9, 4)) # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[2, 1]) ax = plt.subplot(gs[0], aspect='equal') ax2 = plt.subplot(gs[1]) # ,sharey = ax); #### run local random search algorithm #### self.w_hist = [] self.steplength = steplength_vals[count] self.run_gradient_descent() count += 1 # colors for points s = np.linspace(0, 1, len(self.w_hist[:round(len(self.w_hist) / 2)])) s.shape = (len(s), 1) t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):])) t.shape = (len(t), 1) s = np.vstack((s, t)) colorspec = [] colorspec = np.concatenate((s, np.flipud(s)), 1) colorspec = np.concatenate((colorspec, np.zeros((len(s), 1))), 1) #### define input space for function and evaluate #### if np.size( self.w_init ) == 2: # function is multi-input, plot 3d function contour # set viewing limits on contour plot xvals = [self.w_hist[s][0] for s in range(len(self.w_hist))] xvals.append(self.w_init[0]) yvals = [self.w_hist[s][1] for s in range(len(self.w_hist))] yvals.append(self.w_init[1]) xmax = max(xvals) xmin = min(xvals) xgap = (xmax - xmin) * 0.1 ymax = max(yvals) ymin = min(yvals) ygap = (ymax - ymin) * 0.1 xmin -= xgap xmax += xgap ymin -= ygap ymax += ygap if 'xmin' in kwargs: xmin = kwargs['xmin'] if 'xmax' in kwargs: xmax = kwargs['xmax'] if 'ymin' in kwargs: ymin = kwargs['ymin'] if 'ymax' in kwargs: ymax = kwargs['ymax'] w1 = np.linspace(xmin, xmax, 400) w2 = np.linspace(ymin, ymax, 400) w1_vals, w2_vals = np.meshgrid(w1, w2) w1_vals.shape = (len(w1)**2, 1) w2_vals.shape = (len(w2)**2, 1) h = np.concatenate((w1_vals, w2_vals), axis=1) func_vals = np.asarray([g(s) for s in h]) w1_vals.shape = (len(w1), len(w1)) w2_vals.shape = (len(w2), len(w2)) func_vals.shape = (len(w1), len(w2)) ### make contour right plot - as well as horizontal and vertical axes ### # set level ridges num_contours = kwargs['num_contours'] levelmin = min(func_vals.flatten()) levelmax = max(func_vals.flatten()) cutoff = 0.5 cutoff = (levelmax - levelmin) * cutoff numper = 3 levels1 = np.linspace(cutoff, levelmax, numper) num_contours -= numper levels2 = np.linspace(levelmin, cutoff, min(num_contours, numper)) levels = np.unique(np.append(levels1, levels2)) num_contours -= numper while num_contours > 0: cutoff = levels[1] levels2 = np.linspace(levelmin, cutoff, min(num_contours, numper)) levels = np.unique(np.append(levels2, levels)) num_contours -= numper a = ax.contour(w1_vals, w2_vals, func_vals, levels=levels, colors='k') ax.contourf(w1_vals, w2_vals, func_vals, levels=levels, cmap='Blues') # plot points on contour for j in range(len(self.w_hist)): w_val = self.w_hist[j] g_val = self.g(w_val) # plot in left panel if pts == 'on': ax.scatter(w_val[0], w_val[1], s=30, c=colorspec[j], edgecolor='k', linewidth=1.5 * math.sqrt( (1 / (float(j) + 1))), zorder=3) ax2.scatter(j, g_val, s=30, c=colorspec[j], edgecolor='k', linewidth=0.7, zorder=3) # plot point of tangency # plot connector between points for visualization purposes if j > 0: w_old = self.w_hist[j - 1] w_new = self.w_hist[j] g_old = self.g(w_old) g_new = self.g(w_new) ax.plot([w_old[0], w_new[0]], [w_old[1], w_new[1]], color=colorspec[j], linewidth=linewidth, alpha=1, zorder=2) # plot approx ax.plot([w_old[0], w_new[0]], [w_old[1], w_new[1]], color='k', linewidth=linewidth + 0.4, alpha=1, zorder=1) # plot approx ax2.plot([j - 1, j], [g_old, g_new], color=colorspec[j], linewidth=2, alpha=1, zorder=2) # plot approx ax2.plot([j - 1, j], [g_old, g_new], color='k', linewidth=2.5, alpha=1, zorder=1) # plot approx # clean up panel ax.set_xlabel('$w_1$', fontsize=12) ax.set_ylabel('$w_2$', fontsize=12, rotation=0) ax.axhline(y=0, color='k', zorder=0, linewidth=0.5) ax.axvline(x=0, color='k', zorder=0, linewidth=0.5) ax.set_xlim([xmin, xmax]) ax.set_ylim([ymin, ymax]) else: # function is single input, plot curve if 'xmin' in kwargs: xmin = kwargs['xmin'] if 'xmax' in kwargs: xmax = kwargs['xmax'] w_plot = np.linspace(xmin, xmax, 500) g_plot = self.g(w_plot) ax.plot(w_plot, g_plot, color='k', linewidth=2, zorder=2) # set viewing limits ymin = min(g_plot) ymax = max(g_plot) ygap = (ymax - ymin) * 0.2 ymin -= ygap ymax += ygap ax.set_ylim([ymin, ymax]) # clean up panel ax.axhline(y=0, color='k', zorder=1, linewidth=0.25) ax.axvline(x=0, color='k', zorder=1, linewidth=0.25) ax.set_xlabel(r'$w$', fontsize=13) ax.set_ylabel(r'$g(w)$', fontsize=13, rotation=0, labelpad=25) # function single-input, plot input and evaluation points on function for j in range(len(self.w_hist)): w_val = self.w_hist[j] g_val = self.g(w_val) ax.scatter(w_val, g_val, s=90, c=colorspec[j], edgecolor='k', linewidth=0.5 * ((1 / (float(j) + 1)))**(0.4), zorder=3, marker='X') # evaluation on function ax.scatter(w_val, 0, s=90, facecolor=colorspec[j], edgecolor='k', linewidth=0.5 * ((1 / (float(j) + 1)))**(0.4), zorder=3) ax2.scatter(j, g_val, s=30, c=colorspec[j], edgecolor='k', linewidth=0.7, zorder=3) # plot point of tangency # plot connector between points for visualization purposes if j > 0: w_old = self.w_hist[j - 1] w_new = self.w_hist[j] g_old = self.g(w_old) g_new = self.g(w_new) ax2.plot([j - 1, j], [g_old, g_new], color=colorspec[j], linewidth=2, alpha=1, zorder=2) # plot approx ax2.plot([j - 1, j], [g_old, g_new], color='k', linewidth=2.5, alpha=1, zorder=1) # plot approx if axes == True: ax.axhline(linestyle='--', color='k', linewidth=1) ax.axvline(linestyle='--', color='k', linewidth=1) # clean panels title = self.steplength if type(self.steplength) == float or type(self.steplength) == int: title = r'$\alpha = $' + str(self.steplength) ax.set_title(title, fontsize=12) ax2.axhline(y=0, color='k', zorder=0, linewidth=0.5) ax2.set_xlabel('iteration', fontsize=12) ax2.set_ylabel(r'$g(w)$', fontsize=12, rotation=0, labelpad=25) ax.set(aspect='equal') a = ax.get_position() yr = ax.get_position().y1 - ax.get_position().y0 xr = ax.get_position().x1 - ax.get_position().x0 aspectratio = 1.25 * xr / yr # + min(xr,yr) ratio_default = (ax2.get_xlim()[1] - ax2.get_xlim()[0]) / ( ax2.get_ylim()[1] - ax2.get_ylim()[0]) ax2.set_aspect(ratio_default * aspectratio) # plot plt.show()
def animate_it(self, **args): self.g = args['g'] # input function defined by user self.grad = compute_grad(self.g) # first derivative of input self.hess = compute_grad(self.grad) # second derivative of input self.alpha_range = np.linspace( 10**-4, 1, 20 ) # default range of alpha (step length) values to try, adjustable self.max_its = 20 # adjust range of step values to illustrate as well as initial point for all runs if 'alpha_range' in args: self.alpha_range = args['alpha_range'] if 'max_its' in args: self.max_its = args['max_its'] if 'w_init' in args: w_init = args['w_init'] w_init = [float(a) for a in w_init] self.w_init = np.asarray(w_init) self.w_init.shape = (2, 1) view = [10, 50] if 'view' in args: view = args['view'] # initialize figure fig = plt.figure(figsize=(9, 5)) artist = fig # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[3, 1]) ax1 = plt.subplot(gs[0], projection='3d') ax2 = plt.subplot(gs[1]) # animation sub-function print('starting animation rendering...') num_frames = len(self.alpha_range) + 1 def animate(k): ax1.cla() ax2.cla() # print rendering update if np.mod(k + 1, 25) == 0: print('rendering animation frame ' + str(k + 1) + ' of ' + str(num_frames)) if k == num_frames - 1: print('animation rendering complete!') time.sleep(1.5) clear_output() # plot initial point and evaluation if k == 0: w_val = self.w_init g_val = self.g(w_val) ax1.scatter(w_val[0], w_val[1], g_val, s=100, c='m', edgecolor='k', linewidth=0.7, zorder=2) # plot point of tangency # plot function r = np.linspace(-3, 3, 100) # create grid from plotting range w1_vals, w2_vals = np.meshgrid(r, r) w1_vals.shape = (len(r)**2, 1) w2_vals.shape = (len(r)**2, 1) g_vals = self.g([w1_vals, w2_vals]) # vals for cost surface w1_vals.shape = (len(r), len(r)) w2_vals.shape = (len(r), len(r)) g_vals.shape = (len(r), len(r)) ax1.plot_surface(w1_vals, w2_vals, g_vals, alpha=0.1, color='k', rstride=15, cstride=15, linewidth=1, edgecolor='k') # plot function alone first along with initial point if k > 0: alpha = self.alpha_range[k - 1] # setup axes ax1.set_title(r'$\alpha = $' + r'{:.2f}'.format(alpha), fontsize=14) ax2.set_xlabel('iteration', fontsize=13) ax2.set_ylabel('cost function value', fontsize=13) # run gradient descent method self.w_hist = [] self.run_gradient_descent(alpha=alpha) # plot function self.plot_function(ax1) # colors for points s = np.linspace(0, 1, len(self.w_hist[:round(len(self.w_hist) / 2)])) s.shape = (len(s), 1) t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):])) t.shape = (len(t), 1) s = np.vstack((s, t)) self.colorspec = [] self.colorspec = np.concatenate((s, np.flipud(s)), 1) self.colorspec = np.concatenate( (self.colorspec, np.zeros((len(s), 1))), 1) # plot everything for each iteration for j in range(len(self.w_hist)): w_val = self.w_hist[j] g_val = self.g(w_val) grad_val = self.grad(w_val) ax1.scatter(w_val[0], w_val[1], g_val, s=90, c=self.colorspec[j], edgecolor='k', linewidth=0.7, zorder=3) # plot point of tangency ### plot all on cost function decrease plot ax2.scatter(j, g_val, s=90, c=self.colorspec[j], edgecolor='k', linewidth=0.7, zorder=3) # plot point of tangency # clean up second axis ax2.set_xticks(np.arange(len(self.w_hist))) # plot connector between points for visualization purposes if j > 0: w_old = self.w_hist[j - 1] w_new = self.w_hist[j] g_old = self.g(w_old) g_new = self.g(w_new) ax2.plot([j - 1, j], [g_old, g_new], color=self.colorspec[j], linewidth=2, alpha=0.4, zorder=1) # plot approx # clean up plot ax1.view_init(view[0], view[1]) ax1.set_axis_off() return artist, anim = animation.FuncAnimation(fig, animate, frames=num_frames, interval=num_frames, blit=True) return (anim)
def compare_versions_3d(self, g, w_init, steplength, max_its, **kwargs): ### input arguments ### self.g = g self.steplength = steplength self.max_its = max_its self.grad = compute_grad(self.g) # gradient of input function wmax = 1 if 'wmax' in kwargs: wmax = kwargs['wmax'] + 0.5 view = [20, -50] if 'view' in kwargs: view = kwargs['view'] axes = False if 'axes' in kwargs: axes = kwargs['axes'] plot_final = False if 'plot_final' in kwargs: plot_final = kwargs['plot_final'] num_contours = 10 if 'num_contours' in kwargs: num_contours = kwargs['num_contours'] # get initial point self.w_init = np.asarray([float(s) for s in w_init]) # take in user defined step length self.steplength = steplength # take in user defined maximum number of iterations self.max_its = max_its ##### construct figure with panels ##### # construct figure fig = plt.figure(figsize=(12, 6)) # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(2, 3, width_ratios=[1, 5, 10]) ax3 = plt.subplot(gs[1], projection='3d') ax4 = plt.subplot(gs[2], aspect='equal') ax5 = plt.subplot(gs[4], projection='3d') ax6 = plt.subplot(gs[5], aspect='equal') # remove whitespace from figure fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace #### define input space for function and evaluate #### w = np.linspace(-wmax, wmax, 200) w1_vals, w2_vals = np.meshgrid(w, w) w1_vals.shape = (len(w)**2, 1) w2_vals.shape = (len(w)**2, 1) h = np.concatenate((w1_vals, w2_vals), axis=1) func_vals = np.asarray([g(s) for s in h]) w1_vals.shape = (len(w), len(w)) w2_vals.shape = (len(w), len(w)) func_vals.shape = (len(w), len(w)) #### run local random search algorithms #### for algo in ['normalized', 'unnormalized']: # switch normalized / unnormalized self.version = algo title = '' if self.version == 'normalized': ax = ax3 ax2 = ax4 title = 'normalized gradient descent' else: ax = ax5 ax2 = ax6 title = 'unnormalized gradient descent' # plot function ax.plot_surface(w1_vals, w2_vals, func_vals, alpha=0.1, color='w', rstride=25, cstride=25, linewidth=1, edgecolor='k', zorder=2) # plot z=0 plane ax.plot_surface(w1_vals, w2_vals, func_vals * 0, alpha=0.1, color='w', zorder=1, rstride=25, cstride=25, linewidth=0.3, edgecolor='k') ### make contour right plot - as well as horizontal and vertical axes ### ax2.contour(w1_vals, w2_vals, func_vals, num_contours, colors='k') if axes == True: ax2.axhline(linestyle='--', color='k', linewidth=1) ax2.axvline(linestyle='--', color='k', linewidth=1) self.w_hist = [] self.run_gradient_descent() # colors for points s = np.linspace(0, 1, len(self.w_hist[:round(len(self.w_hist) / 2)])) s.shape = (len(s), 1) t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):])) t.shape = (len(t), 1) s = np.vstack((s, t)) colorspec = [] colorspec = np.concatenate((s, np.flipud(s)), 1) colorspec = np.concatenate((colorspec, np.zeros((len(s), 1))), 1) #### scatter path points #### for k in range(len(self.w_hist)): w_now = self.w_hist[k] ax.scatter(w_now[0], w_now[1], 0, s=60, c=colorspec[k], edgecolor='k', linewidth=0.5 * math.sqrt((1 / (float(k) + 1))), zorder=3) ax2.scatter(w_now[0], w_now[1], s=60, c=colorspec[k], edgecolor='k', linewidth=1.5 * math.sqrt((1 / (float(k) + 1))), zorder=3) #### connect points with arrows #### if len(self.w_hist) < 10: for i in range(len(self.w_hist) - 1): pt1 = self.w_hist[i] pt2 = self.w_hist[i + 1] # draw arrow in left plot a = Arrow3D([pt1[0], pt2[0]], [pt1[1], pt2[1]], [0, 0], mutation_scale=10, lw=2, arrowstyle="-|>", color="k") ax.add_artist(a) # draw 2d arrow in right plot ax2.arrow(pt1[0], pt1[1], (pt2[0] - pt1[0]) * 0.78, (pt2[1] - pt1[1]) * 0.78, head_width=0.1, head_length=0.1, fc='k', ec='k', linewidth=3, zorder=2, length_includes_head=True) ### cleanup panels ### ax.set_xlabel('$w_1$', fontsize=12) ax.set_ylabel('$w_2$', fontsize=12, rotation=0) ax.set_title(title, fontsize=12) ax.view_init(view[0], view[1]) ax2.set_xlabel('$w_1$', fontsize=12) ax2.set_ylabel('$w_2$', fontsize=12, rotation=0) ax2.axhline(y=0, color='k', zorder=0, linewidth=0.5) ax2.axvline(x=0, color='k', zorder=0, linewidth=0.5) # clean up axis ax.xaxis.pane.fill = False ax.yaxis.pane.fill = False ax.zaxis.pane.fill = False ax.xaxis.pane.set_edgecolor('white') ax.yaxis.pane.set_edgecolor('white') ax.zaxis.pane.set_edgecolor('white') ax.xaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) ax.yaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) ax.zaxis._axinfo["grid"]['color'] = (1, 1, 1, 0) # plot plt.show()
def newtons_method(self, g, win, **kwargs): # flatten gradient for simpler-written descent loop self.g, unflatten, w = flatten_func(g, win) self.grad = compute_grad(self.g) self.hess = compute_hess(self.g) # parse optional arguments max_its = 20 if 'max_its' in kwargs: max_its = kwargs['max_its'] self.epsilon = 10**-10 if 'epsilon' in kwargs: self.epsilon = kwargs['epsilon'] verbose = True if 'verbose' in kwargs: verbose = kwargs['verbose'] output = 'history' if 'output' in kwargs: output = kwargs['output'] self.counter = copy.deepcopy(self.g) if 'counter' in kwargs: counter = kwargs['counter'] self.counter, unflatten, w = flatten_func(counter, win) # create container for weight history w_hist = [] w_hist.append(unflatten(copy.deepcopy(w))) # start newton's method loop if verbose == True: print('starting optimization...') geval_old = self.g(w) self.w_best = unflatten(copy.deepcopy(w)) g_best = self.counter(w) w_hist = [] if output == 'history': w_hist.append(unflatten(w)) # loop for k in range(max_its): # compute gradient and hessian grad_val = self.grad(w) hess_val = self.hess(w) hess_val.shape = (np.size(w), np.size(w)) # solve linear system for weights C = hess_val + self.epsilon * np.eye(np.size(w)) w = np.linalg.solve(C, np.dot(C, w) - grad_val) # eject from process if reaching singular system geval_new = self.g(w) if k > 2 and geval_new > geval_old: print('singular system reached') time.sleep(1.5) clear_output() if output == 'history': return w_hist elif output == 'best': return self.w_best else: geval_old = geval_new # record current weights if output == 'best': if self.g(w) < g_best: g_best = self.counter(w) self.w_best = copy.deepcopy(unflatten(w)) w_hist.append(unflatten(w)) if verbose == True: print('...optimization complete!') time.sleep(1.5) clear_output() if output == 'best': return self.w_best elif output == 'history': return w_hist
def animate_it(self,savepath,**kwargs): # presets self.g = kwargs['g'] # input function self.grad = compute_grad(self.g) # gradient of input function self.w_init =float( -2) # user-defined initial point (adjustable when calling each algorithm) self.max_its = 20 # max iterations to run for each algorithm self.w_hist = [] # container for algorithm path wmin = -3.1 # max and min viewing wmax = 3.1 self.steplength_range = np.linspace(10**-4,1,20) # default range of alpha (step length) values to try, adjustable # adjust range of step values to illustrate as well as initial point for all runs if 'steplength_range' in kwargs: self.steplength_range = kwargs['steplength_range'] if 'wmin' in kwargs: wmin = kwargs['wmin'] if 'wmax' in kwargs: wmax = kwargs['wmax'] # get new initial point if desired if 'w_init' in kwargs: self.w_init = kwargs['w_init'] # take in user defined step length if 'steplength' in kwargs: self.steplength = kwargs['steplength'] # take in user defined maximum number of iterations if 'max_its' in kwargs: self.max_its = float(kwargs['max_its']) # version of gradient descent to use (normalized or unnormalized) self.version = 'unnormalized' if 'version' in kwargs: self.version = kwargs['version'] # turn on first order approximation illustrated at each step tracers = 'off' if 'tracers' in kwargs: tracers = kwargs['tracers'] # initialize figure fig = plt.figure(figsize = (9,4)) artist = fig # create subplot with 2 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[1,1]) ax1 = plt.subplot(gs[0]); ax2 = plt.subplot(gs[1],sharey=ax1); gs.update(wspace=0.5, hspace=0.1) # generate function for plotting on each slide w_plot = np.linspace(wmin,wmax,500) g_plot = self.g(w_plot) g_range = max(g_plot) - min(g_plot) ggap = g_range*0.1 width = 30 # animation sub-function num_frames = len(self.steplength_range)+1 print ('starting animation rendering...') def animate(k): ax1.cla() ax2.cla() # print rendering update if np.mod(k+1,25) == 0: print ('rendering animation frame ' + str(k+1) + ' of ' + str(num_frames)) if k == num_frames - 1: print ('animation rendering complete!') time.sleep(1.5) clear_output() # plot initial point and evaluation if k == 0: w_val = self.w_init g_val = self.g(w_val) ax1.scatter(w_val,g_val,s = 100,c = 'm',edgecolor = 'k',linewidth = 0.7,zorder = 2) # plot point of tangency # ax1.scatter(w_val,0,s = 100,c = 'm',edgecolor = 'k',linewidth = 0.7, zorder = 2, marker = 'X') # plot function ax1.plot(w_plot,g_plot,color = 'k',zorder = 0) # plot function # plot function alone first along with initial point if k > 0: alpha = self.steplength_range[k-1] # run gradient descent method self.w_hist = [] self.run_gradient_descent(alpha = alpha) # plot function self.plot_function(ax1) # colors for points s = np.linspace(0,1,len(self.w_hist[:round(len(self.w_hist)/2)])) s.shape = (len(s),1) t = np.ones(len(self.w_hist[round(len(self.w_hist)/2):])) t.shape = (len(t),1) s = np.vstack((s,t)) self.colorspec = [] self.colorspec = np.concatenate((s,np.flipud(s)),1) self.colorspec = np.concatenate((self.colorspec,np.zeros((len(s),1))),1) # plot everything for each iteration for j in range(len(self.w_hist)): w_val = self.w_hist[j] g_val = self.g(w_val) grad_val = self.grad(w_val) ax1.scatter(w_val,g_val,s = 90,c = self.colorspec[j],edgecolor = 'k',linewidth = 0.7,zorder = 3) # plot point of tangency # ax1.scatter(w_val,0,s = 90,facecolor = self.colorspec[j],marker = 'X',edgecolor = 'k',linewidth = 0.7, zorder = 2) # determine width to plot the approximation -- so its length == width defined above div = float(1 + grad_val**2) w1 = w_val - math.sqrt(width/div) w2 = w_val + math.sqrt(width/div) # use point-slope form of line to plot wrange = np.linspace(w1,w2, 100) h = g_val + grad_val*(wrange - w_val) # plot tracers connecting consecutive points on the cost (for visualization purposes) if tracers == 'on': if j > 0: w_old = self.w_hist[j-1] w_new = self.w_hist[j] g_old = self.g(w_old) g_new = self.g(w_new) ax1.quiver(w_old, g_old, w_new - w_old, g_new - g_old, scale_units='xy', angles='xy', scale=1, color = self.colorspec[j],linewidth = 1.5,alpha = 0.2,linestyle = '-',headwidth = 4.5,edgecolor = 'k',headlength = 10,headaxislength = 7) ### plot all on cost function decrease plot ax2.scatter(j,g_val,s = 90,c = self.colorspec[j],edgecolor = 'k',linewidth = 0.7,zorder = 3) # plot point of tangency # clean up second axis, set title on first ax2.set_xticks(np.arange(len(self.w_hist))) ax1.set_title(r'$\alpha = $' + r'{:.2f}'.format(alpha),fontsize = 14) # plot connector between points for visualization purposes if j > 0: w_old = self.w_hist[j-1] w_new = self.w_hist[j] g_old = self.g(w_old) g_new = self.g(w_new) ax2.plot([j-1,j],[g_old,g_new],color = self.colorspec[j],linewidth = 2,alpha = 0.4,zorder = 1) # plot approx ### clean up function plot ### # fix viewing limits on function plot #ax1.set_xlim([-3,3]) #ax1.set_ylim([min(g_plot) - ggap,max(g_plot) + ggap]) # draw axes and labels ax1.set_xlabel(r'$w$',fontsize = 13) ax1.set_ylabel(r'$g(w)$',fontsize = 13,rotation = 0,labelpad = 25) ax2.set_xlabel('iteration',fontsize = 13) ax2.set_ylabel(r'$g(w)$',fontsize = 13,rotation = 0,labelpad = 25) ax1.axhline(y=0, color='k',zorder = 0,linewidth = 0.5) ax2.axhline(y=0, color='k',zorder = 0,linewidth = 0.5) return artist, anim = animation.FuncAnimation(fig, animate ,frames=num_frames, interval=num_frames, blit=True) # produce animation and save fps = 50 if 'fps' in kwargs: fps = kwargs['fps'] anim.save(savepath, fps=fps, extra_args=['-vcodec', 'libx264']) clear_output()
def __init__(self,**args): self.g = args['g'] # input function self.grad = compute_grad(self.g) # gradient of input function self.w_init =float( -3) # input initial point self.w_hist = [] self.colorspec = [] # container for colors --> when algorithm begins, colored green, as it ends color turns yellow, then red
def animate_visualize2d(**kwargs): g = kwargs['g'] # input function grad = compute_grad(g) # gradient of input function colors = [[0, 1, 0.25], [0, 0.75, 1]] # set of custom colors used for plotting num_frames = 300 # number of slides to create - the input range [-3,3] is divided evenly by this number if 'num_frames' in kwargs: num_frames = kwargs['num_frames'] plot_descent = False if 'plot_descent' in kwargs: plot_descent = kwargs['plot_descent'] # initialize figure fig = plt.figure(figsize=(16, 8)) artist = fig # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1]) ax1 = plt.subplot(gs[0]) ax1.axis('off') ax3 = plt.subplot(gs[2]) ax3.axis('off') # plot input function ax = plt.subplot(gs[1]) # generate a range of values over which to plot input function, and derivatives w_plot = np.linspace(-3, 3, 200) # input range for original function g_plot = g(w_plot) g_range = max(g_plot) - min(g_plot) # used for cleaning up final plot ggap = g_range * 0.2 w_vals = np.linspace( -3, 3, num_frames ) # range of values over which to plot first / second order approximations # animation sub-function print('starting animation rendering...') def animate(k): # clear the panel ax.cla() # print rendering update if np.mod(k + 1, 25) == 0: print('rendering animation frame ' + str(k + 1) + ' of ' + str(num_frames)) if k == num_frames - 1: print('animation rendering complete!') time.sleep(1.5) clear_output() # grab the next input/output tangency pair, the center of the next approximation(s) w_val = w_vals[k] g_val = g(w_val) # plot original function ax.plot(w_plot, g_plot, color='k', zorder=1, linewidth=4) # plot function # plot the input/output tangency point ax.scatter(w_val, g_val, s=200, c='lime', edgecolor='k', linewidth=2, zorder=3) # plot point of tangency #### plot first order approximation #### # plug input into the first derivative g_grad_val = grad(w_val) # determine width to plot the approximation -- so its length == width width = 1 div = float(1 + g_grad_val**2) w1 = w_val - math.sqrt(width / div) w2 = w_val + math.sqrt(width / div) # compute first order approximation wrange = np.linspace(w1, w2, 100) h = g_val + g_grad_val * (wrange - w_val) # plot the first order approximation ax.plot(wrange, h, color='lime', alpha=0.5, linewidth=6, zorder=2) # plot approx #### plot derivative as vector #### func = lambda w: g_val + g_grad_val * w name = r'$\frac{\mathrm{d}}{\mathrm{d}w}g(' + r'{:.2f}'.format( w_val) + r')$' if abs(func(1) - func(0)) >= 0: head_width = 0.08 * (func(1) - func(0)) head_length = 0.2 * (func(1) - func(0)) # annotate arrow and annotation if func(1) - func(0) >= 0: ax.arrow(0, 0, func(1) - func(0), 0, head_width=head_width, head_length=head_length, fc='k', ec='k', linewidth=2.5, zorder=3) ax.annotate(name, xy=(2, 1), xytext=(func(1 + 0.3) - func(0), 0), fontsize=22) elif func(1) - func(0) < 0: ax.arrow(0, 0, func(1) - func(0), 0, head_width=-head_width, head_length=-head_length, fc='k', ec='k', linewidth=2.5, zorder=3) ax.annotate(name, xy=(2, 1), xytext=(func(1 + 0.3) - 1.3 - func(0), 0), fontsize=22) #### plot negative derivative as vector #### if plot_descent == True: ax.scatter(0, 0, c='k', edgecolor='w', s=100, linewidth=0.5, zorder=4) func = lambda w: g_val - g_grad_val * w name = r'$-\frac{\mathrm{d}}{\mathrm{d}w}g(' + r'{:.2f}'.format( w_val) + r')$' if abs(func(1) - func(0)) >= 0: head_width = 0.08 * (func(1) - func(0)) head_length = 0.2 * (func(1) - func(0)) # annotate arrow and annotation if func(1) - func(0) >= 0: ax.arrow(0, 0, func(1) - func(0), 0, head_width=head_width, head_length=head_length, fc='r', ec='r', linewidth=2.5, zorder=3) ax.annotate(name, xy=(2, 1), xytext=(func(1 + 0.3) - 0.2 - func(0), 0), fontsize=22) elif func(1) - func(0) < 0: ax.arrow(0, 0, func(1) - func(0), 0, head_width=-head_width, head_length=-head_length, fc='r', ec='r', linewidth=2.5, zorder=3) ax.annotate(name, xy=(2, 1), xytext=(func(1 + 0.3) - 1.6 - func(0), 0), fontsize=22) #### clean up panel #### # fix viewing limits on panel ax.set_xlim([-5, 5]) ax.set_ylim( [min(min(g_plot) - ggap, -0.5), max(max(g_plot) + ggap, 0.5)]) # label axes ax.set_xlabel('$w$', fontsize=25) ax.set_ylabel('$g(w)$', fontsize=25, rotation=0, labelpad=50) ax.grid(False) ax.yaxis.set_visible(False) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['left'].set_visible(False) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(18) return artist, anim = animation.FuncAnimation(fig, animate, frames=len(w_vals), interval=len(w_vals), blit=True) return (anim)
def newtons_method(self, g, w_hist, **kwargs): # compute gradient and hessian of input grad = compute_grad(g) # gradient of input function hess = compute_hess(g) # hessian of input function # set viewing range wmax = 3 if 'wmax' in kwargs: wmax = kwargs['wmax'] wmin = -wmax if 'wmin' in kwargs: wmin = kwargs['wmin'] # initialize figure fig = plt.figure(figsize=(9, 4)) artist = fig # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1]) ax1 = plt.subplot(gs[0]) ax1.axis('off') ax3 = plt.subplot(gs[2]) ax3.axis('off') ax = plt.subplot(gs[1]) # generate function for plotting on each slide w_plot = np.linspace(wmin, wmax, 1000) g_plot = g(w_plot) g_range = max(g_plot) - min(g_plot) ggap = g_range * 0.1 w_vals = np.linspace(-2.5, 2.5, 50) width = 1 # make color spectrum for points colorspec = self.make_colorspec(w_hist) # animation sub-function print('starting animation rendering...') num_frames = 2 * len(w_hist) + 2 def animate(t): ax.cla() k = math.floor((t + 1) / float(2)) # print rendering update if np.mod(k + 1, 25) == 0: print('rendering animation frame ' + str(k + 1) + ' of ' + str(num_frames)) if t == num_frames - 1: print('animation rendering complete!') time.sleep(1.5) clear_output() # plot function ax.plot(w_plot, g_plot, color='k', zorder=1) # plot function # plot initial point and evaluation if k == 0: w_val = w_hist[0] g_val = g(w_val) ax.scatter(w_val, g_val, s=100, c=colorspec[k], edgecolor='k', linewidth=0.7, marker='X', zorder=2) # plot point of tangency ax.scatter(w_val, 0, s=100, c=colorspec[k], edgecolor='k', linewidth=0.7, zorder=2) # draw dashed line connecting w axis to point on cost function s = np.linspace(0, g_val) o = np.ones((len(s))) ax.plot(o * w_val, s, 'k--', linewidth=1, zorder=0) # plot all input/output pairs generated by algorithm thus far if k > 0: # plot all points up to this point for j in range(min(k - 1, len(w_hist))): w_val = w_hist[j] g_val = g(w_val) ax.scatter(w_val, g_val, s=90, c=colorspec[j], edgecolor='k', marker='X', linewidth=0.7, zorder=3) # plot point of tangency ax.scatter(w_val, 0, s=90, facecolor=colorspec[j], edgecolor='k', linewidth=0.7, zorder=2) # plot surrogate function and travel-to point if k > 0 and k < len(w_hist) + 1: # grab historical weight, compute function and derivative evaluations w_eval = w_hist[k - 1] if type(w_eval) != float: w_eval = float(w_eval) # plug in value into func and derivative g_eval = g(w_eval) g_grad_eval = grad(w_eval) g_hess_eval = hess(w_eval) # determine width of plotting area for second order approximator width = 0.5 if g_hess_eval < 0: width = -width # setup quadratic formula params a = 0.5 * g_hess_eval b = g_grad_eval - 2 * 0.5 * g_hess_eval * w_eval c = 0.5 * g_hess_eval * w_eval**2 - g_grad_eval * w_eval - width # solve for zero points w1 = (-b + math.sqrt(b**2 - 4 * a * c)) / float(2 * a + 0.00001) w2 = (-b - math.sqrt(b**2 - 4 * a * c)) / float(2 * a + 0.00001) # compute second order approximation wrange = np.linspace(w1, w2, 100) h = g_eval + g_grad_eval * ( wrange - w_eval) + 0.5 * g_hess_eval * (wrange - w_eval)**2 # plot tangent curve ax.plot(wrange, h, color=colorspec[k - 1], linewidth=2, zorder=2) # plot approx # plot tangent point ax.scatter(w_eval, g_eval, s=100, c='m', edgecolor='k', marker='X', linewidth=0.7, zorder=3) # plot point of tangency # plot next point learned from surrogate if np.mod(t, 2) == 0: # create next point information w_zero = w_eval - g_grad_eval / (g_hess_eval + 10**-5) g_zero = g(w_zero) h_zero = g_eval + g_grad_eval * ( w_zero - w_eval) + 0.5 * g_hess_eval * (w_zero - w_eval)**2 # draw dashed line connecting the three vals = [0, h_zero, g_zero] vals = np.sort(vals) s = np.linspace(vals[0], vals[2]) o = np.ones((len(s))) ax.plot(o * w_zero, s, 'k--', linewidth=1) # draw intersection at zero and associated point on cost function you hop back too ax.scatter(w_zero, h_zero, s=100, c='b', linewidth=0.7, marker='X', edgecolor='k', zorder=3) ax.scatter(w_zero, 0, s=100, c='m', edgecolor='k', linewidth=0.7, zorder=3) ax.scatter(w_zero, g_zero, s=100, c='m', edgecolor='k', linewidth=0.7, marker='X', zorder=3) # plot point of tangency # fix viewing limits on panel ax.set_xlim([wmin, wmax]) ax.set_ylim( [min(-0.3, min(g_plot) - ggap), max(max(g_plot) + ggap, 0.3)]) # add horizontal axis ax.axhline(y=0, color='k', zorder=0, linewidth=0.5) # label axes ax.set_xlabel(r'$w$', fontsize=14) ax.set_ylabel(r'$g(w)$', fontsize=14, rotation=0, labelpad=25) # set tickmarks ax.set_xticks(np.arange(round(wmin), round(wmax) + 1, 1.0)) ax.set_yticks( np.arange(round(min(g_plot) - ggap), round(max(g_plot) + ggap) + 1, 1.0)) return artist, anim = animation.FuncAnimation(fig, animate, frames=num_frames, interval=num_frames, blit=True) return (anim)
def visualize3d(func, **kwargs): grad = compute_grad(func) # gradient of input function colors = [[0, 1, 0.25], [0, 0.75, 1]] # set of custom colors used for plotting num_frames = 10 if 'num_frames' in kwargs: num_frames = kwargs['num_frames'] view = [20, -50] if 'view' in kwargs: view = kwargs['view'] plot_descent = False if 'plot_descent' in kwargs: plot_descent = kwargs['plot_descent'] pt1 = [0, 0] pt2 = [-0.5, 0.5] if 'pt' in kwargs: pt1 = kwargs['pt'] if 'pt2' in kwargs: pt2 = kwargs['pt2'] # construct figure fig = plt.figure(figsize=(9, 6)) # remove whitespace from figure fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace fig.subplots_adjust(wspace=0.01, hspace=0.01) # create subplotting mechanism gs = gridspec.GridSpec(1, 1) ax1 = plt.subplot(gs[0], projection='3d') # define input space w_in = np.linspace(-2, 2, 200) w1_vals, w2_vals = np.meshgrid(w_in, w_in) w1_vals.shape = (len(w_in)**2, 1) w2_vals.shape = (len(w_in)**2, 1) w_vals = np.concatenate((w1_vals, w2_vals), axis=1).T g_vals = func(w_vals) cont = 1 for pt in [pt1]: # create axis for plotting if cont == 1: ax = ax1 if cont == 2: ax = ax2 cont += 1 # evaluation points w_val = np.array([float(pt[0]), float(pt[1])]) w_val.shape = (2, 1) g_val = func(w_val) grad_val = grad(w_val) grad_val.shape = (2, 1) # create and evaluate tangent hyperplane w_tan = np.linspace(-1, 1, 200) w1tan_vals, w2tan_vals = np.meshgrid(w_tan, w_tan) w1tan_vals.shape = (len(w_tan)**2, 1) w2tan_vals.shape = (len(w_tan)**2, 1) wtan_vals = np.concatenate((w1tan_vals, w2tan_vals), axis=1).T #h = lambda weh: g_val + np.dot( (weh - w_val).T,grad_val) h = lambda weh: g_val + (weh[0] - w_val[0]) * grad_val[0] + (weh[ 1] - w_val[1]) * grad_val[1] h_vals = h(wtan_vals + w_val) zmin = min(np.min(h_vals), -0.5) zmax = max(np.max(h_vals), +0.5) # vals for cost surface, reshape for plot_surface function w1_vals.shape = (len(w_in), len(w_in)) w2_vals.shape = (len(w_in), len(w_in)) g_vals.shape = (len(w_in), len(w_in)) w1tan_vals += w_val[0] w2tan_vals += w_val[1] w1tan_vals.shape = (len(w_tan), len(w_tan)) w2tan_vals.shape = (len(w_tan), len(w_tan)) h_vals.shape = (len(w_tan), len(w_tan)) ### plot function ### ax.plot_surface(w1_vals, w2_vals, g_vals, alpha=0.1, color='w', rstride=25, cstride=25, linewidth=1, edgecolor='k', zorder=2) ### plot z=0 plane ### ax.plot_surface(w1_vals, w2_vals, g_vals * 0, alpha=0.1, color='w', zorder=1, rstride=25, cstride=25, linewidth=0.3, edgecolor='k') ### plot tangent plane ### ax.plot_surface(w1tan_vals, w2tan_vals, h_vals, alpha=0.1, color='lime', zorder=1, rstride=50, cstride=50, linewidth=1, edgecolor='k') ### plot particular points - origins and tangency ### # scatter origin ax.scatter(0, 0, 0, s=60, c='k', edgecolor='w', linewidth=2) # scatter tangency ax.scatter(w_val[0], w_val[1], g_val, s=70, c='lime', edgecolor='k', linewidth=2) ##### add arrows and annotations for steepest ascent direction ##### # re-assign func variable to tangent cutoff_val = 0.1 an = 1.7 pname = 'g(' + str(pt[0]) + ',' + str(pt[1]) + ')' s = h([1, 0]) - h([0, 0]) if abs(s) > cutoff_val: # draw arrow a = Arrow3D([0, s], [0, 0], [0, 0], mutation_scale=20, lw=2, arrowstyle="-|>", color="b") ax.add_artist(a) # label arrow q = h([an, 0]) - h([0, 0]) name = r'$\left(\frac{\mathrm{d}}{\mathrm{d}w_1}' + pname + r',0\right)$' annotate3D(ax, s=name, xyz=[q, 0, 0], fontsize=12, xytext=(-3, 3), textcoords='offset points', ha='center', va='center') t = h([0, 1]) - h([0, 0]) if abs(t) > cutoff_val: # draw arrow a = Arrow3D([0, 0], [0, t], [0, 0], mutation_scale=20, lw=2, arrowstyle="-|>", color="b") ax.add_artist(a) # label arrow q = h([0, an]) - h([0, 0]) name = r'$\left(0,\frac{\mathrm{d}}{\mathrm{d}w_2}' + pname + r'\right)$' annotate3D(ax, s=name, xyz=[0, q, 0], fontsize=12, xytext=(-3, 3), textcoords='offset points', ha='center', va='center') # full gradient if abs(s) > cutoff_val and abs(t) > cutoff_val: a = Arrow3D([0, h([1, 0]) - h([0, 0])], [0, h([0, 1]) - h([0, 0])], [0, 0], mutation_scale=20, lw=2, arrowstyle="-|>", color="k") ax.add_artist(a) s = h([an + 0.2, 0]) - h([0, 0]) t = h([0, an + 0.2]) - h([0, 0]) name = r'$\left(\frac{\mathrm{d}}{\mathrm{d}w_1}' + pname + r',\frac{\mathrm{d}}{\mathrm{d}w_2}' + pname + r'\right)$' annotate3D(ax, s=name, xyz=[s, t, 0], fontsize=12, xytext=(-3, 3), textcoords='offset points', ha='center', va='center') ###### add arrow and text for steepest descent direction ##### if plot_descent == True: # full negative gradient if abs(s) > cutoff_val and abs(t) > cutoff_val: a = Arrow3D([0, -(h([1, 0]) - h([0, 0]))], [0, -(h([0, 1]) - h([0, 0]))], [0, 0], mutation_scale=20, lw=2, arrowstyle="-|>", color="r") ax.add_artist(a) s = -(h([an + 0.2, 0]) - h([0, 0])) t = -(h([0, an + 0.2]) - h([0, 0])) name = r'$\left(-\frac{\mathrm{d}}{\mathrm{d}w_1}' + pname + r',-\frac{\mathrm{d}}{\mathrm{d}w_2}' + pname + r'\right)$' annotate3D(ax, s=name, xyz=[s, t, 0], fontsize=12, xytext=(-3, 3), textcoords='offset points', ha='center', va='center') ### clean up plot ### # plot x and y axes, and clean up ax.xaxis.pane.fill = False ax.yaxis.pane.fill = False ax.zaxis.pane.fill = False ax.xaxis.pane.set_edgecolor('white') ax.yaxis.pane.set_edgecolor('white') ax.zaxis.pane.set_edgecolor('white') # remove axes lines and tickmarks ax.w_zaxis.line.set_lw(0.) ax.set_zticks([]) ax.w_xaxis.line.set_lw(0.) ax.set_xticks([]) ax.w_yaxis.line.set_lw(0.) ax.set_yticks([]) # set viewing angle ax.view_init(view[0], view[1]) # set vewing limits y = 4.5 ax.set_xlim([-y, y]) ax.set_ylim([-y, y]) ax.set_zlim([zmin, zmax]) # label plot fontsize = 14 ax.set_xlabel(r'$w_1$', fontsize=fontsize, labelpad=-20) ax.set_ylabel(r'$w_2$', fontsize=fontsize, rotation=0, labelpad=-30) # plot plt.show()
def compare_2d3d(func1, func2, **kwargs): view = [20, -50] if 'view' in kwargs: view = kwargs['view'] # construct figure fig = plt.figure(figsize=(12, 4)) # remove whitespace from figure fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace fig.subplots_adjust(wspace=0.01, hspace=0.01) # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 3, width_ratios=[1, 2, 4]) ### draw 2d version ### ax1 = plt.subplot(gs[1]) grad = compute_grad(func1) # generate a range of values over which to plot input function, and derivatives w_plot = np.linspace(-3, 3, 200) # input range for original function g_plot = func1(w_plot) g_range = max(g_plot) - min(g_plot) # used for cleaning up final plot ggap = g_range * 0.2 w_vals = np.linspace(-2.5, 2.5, 200) # grab the next input/output tangency pair, the center of the next approximation(s) w_val = float(0) g_val = func1(w_val) # plot original function ax1.plot(w_plot, g_plot, color='k', zorder=1, linewidth=2) # plot axis ax1.plot(w_plot, g_plot * 0, color='k', zorder=1, linewidth=1) # plot the input/output tangency point ax1.scatter(w_val, g_val, s=80, c='lime', edgecolor='k', linewidth=2, zorder=3) # plot point of tangency #### plot first order approximation #### # plug input into the first derivative g_grad_val = grad(w_val) # determine width to plot the approximation -- so its length == width width = 4 div = float(1 + g_grad_val**2) w1 = w_val - math.sqrt(width / div) w2 = w_val + math.sqrt(width / div) # compute first order approximation wrange = np.linspace(w1, w2, 100) h = g_val + g_grad_val * (wrange - w_val) # plot the first order approximation ax1.plot(wrange, h, color='lime', alpha=0.5, linewidth=3, zorder=2) # plot approx #### clean up panel #### # fix viewing limits on panel v = 5 ax1.set_xlim([-v, v]) ax1.set_ylim([-1 - 0.3, v - 0.3]) # label axes ax1.set_xlabel('$w$', fontsize=12, labelpad=-60) ax1.set_ylabel('$g(w)$', fontsize=25, rotation=0, labelpad=50) ax1.grid(False) ax1.yaxis.set_visible(False) ax1.spines['right'].set_visible(False) ax1.spines['top'].set_visible(False) ax1.spines['left'].set_visible(False) ### draw 3d version ### ax2 = plt.subplot(gs[2], projection='3d') grad = compute_grad(func2) w_val = [float(0), float(0)] # define input space w_in = np.linspace(-2, 2, 200) w1_vals, w2_vals = np.meshgrid(w_in, w_in) w1_vals.shape = (len(w_in)**2, 1) w2_vals.shape = (len(w_in)**2, 1) w_vals = np.concatenate((w1_vals, w2_vals), axis=1).T g_vals = func2(w_vals) # evaluation points w_val = np.array([float(w_val[0]), float(w_val[1])]) w_val.shape = (2, 1) g_val = func2(w_val) grad_val = grad(w_val) grad_val.shape = (2, 1) # create and evaluate tangent hyperplane w_tan = np.linspace(-1, 1, 200) w1tan_vals, w2tan_vals = np.meshgrid(w_tan, w_tan) w1tan_vals.shape = (len(w_tan)**2, 1) w2tan_vals.shape = (len(w_tan)**2, 1) wtan_vals = np.concatenate((w1tan_vals, w2tan_vals), axis=1).T #h = lambda weh: g_val + np.dot( (weh - w_val).T,grad_val) h = lambda weh: g_val + (weh[0] - w_val[0]) * grad_val[0] + (weh[ 1] - w_val[1]) * grad_val[1] h_vals = h(wtan_vals + w_val) zmin = min(np.min(h_vals), -0.5) zmax = max(np.max(h_vals), +0.5) # vals for cost surface, reshape for plot_surface function w1_vals.shape = (len(w_in), len(w_in)) w2_vals.shape = (len(w_in), len(w_in)) g_vals.shape = (len(w_in), len(w_in)) w1tan_vals += w_val[0] w2tan_vals += w_val[1] w1tan_vals.shape = (len(w_tan), len(w_tan)) w2tan_vals.shape = (len(w_tan), len(w_tan)) h_vals.shape = (len(w_tan), len(w_tan)) ### plot function ### ax2.plot_surface(w1_vals, w2_vals, g_vals, alpha=0.5, color='w', rstride=25, cstride=25, linewidth=1, edgecolor='k', zorder=2) ### plot z=0 plane ### ax2.plot_surface(w1_vals, w2_vals, g_vals * 0, alpha=0.1, color='w', zorder=1, rstride=25, cstride=25, linewidth=0.3, edgecolor='k') ### plot tangent plane ### ax2.plot_surface(w1tan_vals, w2tan_vals, h_vals, alpha=0.4, color='lime', zorder=1, rstride=50, cstride=50, linewidth=1, edgecolor='k') # scatter tangency ax2.scatter(w_val[0], w_val[1], g_val, s=70, c='lime', edgecolor='k', linewidth=2) ### clean up plot ### # plot x and y axes, and clean up ax2.xaxis.pane.fill = False ax2.yaxis.pane.fill = False ax2.zaxis.pane.fill = False ax2.xaxis.pane.set_edgecolor('white') ax2.yaxis.pane.set_edgecolor('white') ax2.zaxis.pane.set_edgecolor('white') # remove axes lines and tickmarks ax2.w_zaxis.line.set_lw(0.) ax2.set_zticks([]) ax2.w_xaxis.line.set_lw(0.) ax2.set_xticks([]) ax2.w_yaxis.line.set_lw(0.) ax2.set_yticks([]) # set viewing angle ax2.view_init(20, -65) # set vewing limits y = 4 ax2.set_xlim([-y, y]) ax2.set_ylim([-y, y]) ax2.set_zlim([zmin, zmax]) # label plot fontsize = 12 ax2.set_xlabel(r'$w_1$', fontsize=fontsize, labelpad=-35) ax2.set_ylabel(r'$w_2$', fontsize=fontsize, rotation=0, labelpad=-40) plt.show()
def gradient_descent(self, g, w, **kwargs): # flatten function self.g, unflatten, w = flatten_func(g, w) self.grad = compute_grad(self.g) # parse optional arguments max_its = 100 if 'max_its' in kwargs: max_its = kwargs['max_its'] version = 'unnormalized' if 'version' in kwargs: version = kwargs['version'] alpha = 10**-4 if 'alpha' in kwargs: alpha = kwargs['alpha'] steplength_rule = 'none' if 'steplength_rule' in kwargs: steplength_rule = kwargs['steplength_rule'] projection = 'None' if 'projection' in kwargs: projection = kwargs['projection'] output = 'history' if 'output' in kwargs: output = kwargs['output'] diminish_num = 10 if 'diminish_num' in kwargs: diminish_num = kwargs['diminish_num'] verbose = True if 'verbose' in kwargs: verbose = kwargs['verbose'] # create container for weight history w_hist = [] g_best = np.inf w_best = unflatten(copy.deepcopy(w)) if output == 'history': w_hist.append(unflatten(w)) # start gradient descent loop if verbose == True: print('starting optimization...') d = 1 # diminish count for k in range(max_its): # plug in value into func and derivative grad_eval = self.grad(w) grad_eval.shape = np.shape(w) ### normalized or unnormalized descent step? ### if version == 'normalized': grad_norm = np.linalg.norm(grad_eval) if grad_norm == 0: grad_norm += 10**-6 * np.sign(2 * np.random.rand(1) - 1) grad_eval /= grad_norm ### decide on steplength parameter alpha ### # a fixed step? # alpha = alpha # print out progress if np.mod(k, 100) == 0 and k > 0: print(str(k) + ' of ' + str(max_its) + ' iterations complete') # use backtracking line search? if steplength_rule == 'backtracking': alpha = self.backtracking(w, grad_eval) # use a pre-set diminishing steplength parameter? if steplength_rule == 'diminishing': alpha = 1 / (float(d)) if np.mod(k, diminish_num) == 0 and k > 0: d += 1 ### take gradient descent step ### w = w - alpha * grad_eval ### projection? ### if 'projection' in kwargs: w = projection(w) # record weight for history if output == 'history': w_hist.append(unflatten(w)) if output == 'best': if self.g(w) < g_best: g_best = self.g(w) w_best = unflatten(w) if verbose == True: print('...optimization complete!') time.sleep(1.5) clear_output() # return if output == 'history': return w_hist if output == 'best': return w_best
def __init__(self, **args): self.g = args['g'] # input function self.grad = compute_grad(self.g) # gradient of input function self.hess = compute_grad(self.grad) # hessian of input function self.colors = [[0, 1, 0.25], [0, 0.75, 1]] # set of custom colors used for plotting
def __init__(self, **args): # get some crucial parameters from the input gridworld self.grid = args['gridworld'] # initialize q-learning params self.gamma = 1 self.max_steps = 5 * self.grid.width * self.grid.height self.exploit_param = 0.5 self.action_method = 'exploit' self.training_episodes = 500 self.validation_episodes = 50 self.training_start_schedule = [] self.validation_start_schedule = [] # swap out for user defined q-learning params if desired if "gamma" in args: self.gamma = args['gamma'] if 'max_steps' in args: self.max_steps = args['max_steps'] if 'action_method' in args: self.action_method = args['action_method'] if 'exploit_param' in args: self.exploit = args['exploit_param'] self.action_method = 'exploit' if 'training_episodes' in args: self.training_episodes = args['training_episodes'] # return error if number of training episodes is too big if self.training_episodes > self.grid.training_episodes: print 'requesting too many training episodes, the maximum num = ' + str( self.grid.training_episodes) return self.training_start_schedule = self.grid.training_start_schedule[:self. training_episodes] if 'validation_episodes' in args: self.validation_episodes = args['validation_episodes'] # return error if number of training episodes is too big if self.validation_episodes > self.grid.validation_episodes: print 'requesting too many validation episodes, the maximum num = ' + str( self.grid.validation_episodes) return self.validation_start_schedule = self.grid.validation_start_schedule[: self . validation_episodes] ##### import function approximators class ##### # initialize function approximation params and weights self.deg = 1 if 'degree' in args: self.deg = args['degree'] self.step_size = 1 / float( max(self.grid.height, self.grid.width) * self.deg) * 10**-5 if 'step_size' in args: self.step_size = args['step_size'] # switch for choosing various nonlinear approximators self.h = 0 self.W = 0 self.num_actions = 4 if args['approximator'] == 'linear': self.h = self.linear_approximator # initialize weight matrix for function approximator self.W = np.random.randn( self.num_actions, self.deg, 1 + 2 ) # the number of weights per function --> 1 bias, 2 state touching weights (one per state dim) if args['approximator'] == 'cosine': self.h = self.cosine_approximator # initialize weight matrix for function approximator self.W = np.random.randn( self.num_actions, self.deg, 2 ) # the number of weights per function --> 1 bias, 2 touching cosine self.W = self.W.astype('float') # compute gradient of approximator for later use self.h_grad = compute_grad(self.h)
def animate_2d(self, g, w_hist, **kwargs): self.g = g # input function self.w_hist = w_hist # input weight history self.grad = compute_grad(self.g) # gradient of input function self.w_init = self.w_hist[ 0] # user-defined initial point (adjustable when calling each algorithm) wmin = -3.1 wmax = 3.1 if 'wmin' in kwargs: wmin = kwargs['wmin'] if 'wmax' in kwargs: wmax = kwargs['wmax'] # initialize figure fig = plt.figure(figsize=(9, 4)) artist = fig # remove whitespace from figure #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace #fig.subplots_adjust(wspace=0.01,hspace=0.01) # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1]) ax1 = plt.subplot(gs[0]) ax1.axis('off') ax3 = plt.subplot(gs[2]) ax3.axis('off') ax = plt.subplot(gs[1]) # generate function for plotting on each slide w_plot = np.linspace(wmin, wmax, 200) g_plot = self.g(w_plot) g_range = max(g_plot) - min(g_plot) ggap = g_range * 0.1 width = 30 # colors for points --> green as the algorithm begins, yellow as it converges, red at final point s = np.linspace(0, 1, len(self.w_hist[:round(len(self.w_hist) / 2)])) s.shape = (len(s), 1) t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):])) t.shape = (len(t), 1) s = np.vstack((s, t)) self.colorspec = [] self.colorspec = np.concatenate((s, np.flipud(s)), 1) self.colorspec = np.concatenate((self.colorspec, np.zeros( (len(s), 1))), 1) # animation sub-function num_frames = 2 * len(self.w_hist) + 2 print('starting animation rendering...') def animate(t): ax.cla() k = math.floor((t + 1) / float(2)) # print rendering update if np.mod(t + 1, 25) == 0: print('rendering animation frame ' + str(t + 1) + ' of ' + str(num_frames)) if t == num_frames - 1: print('animation rendering complete!') time.sleep(1.5) clear_output() # plot function ax.plot(w_plot, g_plot, color='k', zorder=2) # plot function # plot initial point and evaluation if k == 0: w_val = self.w_init g_val = self.g(w_val) ax.scatter(w_val, g_val, s=90, c=self.colorspec[k], edgecolor='k', linewidth=0.5 * ((1 / (float(k) + 1)))**(0.4), zorder=3, marker='X') # evaluation on function ax.scatter(w_val, 0, s=90, facecolor=self.colorspec[k], edgecolor='k', linewidth=0.5 * ((1 / (float(k) + 1)))**(0.4), zorder=3) # draw dashed line connecting w axis to point on cost function s = np.linspace(0, g_val) o = np.ones((len(s))) ax.plot(o * w_val, s, 'k--', linewidth=1) # plot all input/output pairs generated by algorithm thus far if k > 0: # plot all points up to this point for j in range(min(k - 1, len(self.w_hist))): w_val = self.w_hist[j] g_val = self.g(w_val) ax.scatter(w_val, g_val, s=90, c=self.colorspec[j], edgecolor='k', linewidth=0.5 * ((1 / (float(j) + 1)))**(0.4), zorder=3, marker='X') # plot point of tangency ax.scatter(w_val, 0, s=90, facecolor=self.colorspec[j], edgecolor='k', linewidth=0.5 * ((1 / (float(j) + 1)))**(0.4), zorder=2) # plot surrogate function and travel-to point if k > 0 and k < len(self.w_hist) + 1: # grab historical weight, compute function and derivative evaluations w = self.w_hist[k - 1] g_eval = self.g(w) grad_eval = float(self.grad(w)) # determine width to plot the approximation -- so its length == width defined above div = float(1 + grad_eval**2) w1 = w - math.sqrt(width / div) w2 = w + math.sqrt(width / div) # use point-slope form of line to plot wrange = np.linspace(w1, w2, 100) h = g_eval + grad_eval * (wrange - w) # plot tangent line ax.plot(wrange, h, color=self.colorspec[k - 1], linewidth=2, zorder=1) # plot approx # plot tangent point ax.scatter(w, g_eval, s=100, c='m', edgecolor='k', linewidth=0.7, zorder=3, marker='X') # plot point of tangency # plot next point learned from surrogate if np.mod(t, 2) == 0 and k < len(self.w_hist) - 1: # create next point information w_zero = self.w_hist[k] g_zero = self.g(w_zero) h_zero = g_eval + grad_eval * (w_zero - w) # draw dashed line connecting the three vals = [0, h_zero, g_zero] vals = np.sort(vals) s = np.linspace(vals[0], vals[2]) o = np.ones((len(s))) ax.plot(o * w_zero, s, 'k--', linewidth=1) # draw intersection at zero and associated point on cost function you hop back too ax.scatter(w_zero, h_zero, s=100, c='k', zorder=3, marker='X') ax.scatter(w_zero, 0, s=100, c='m', edgecolor='k', linewidth=0.7, zorder=3) ax.scatter(w_zero, g_zero, s=100, c='m', edgecolor='k', linewidth=0.7, zorder=3, marker='X') # plot point of tangency # fix viewing limits ax.set_xlim([wmin - 0.1, wmax + 0.1]) ax.set_ylim([min(g_plot) - ggap, max(g_plot) + ggap]) ax.axhline(y=0, color='k', zorder=0, linewidth=0.5) # place title ax.set_xlabel(r'$w$', fontsize=14) ax.set_ylabel(r'$g(w)$', fontsize=14, rotation=0, labelpad=25) return artist, anim = animation.FuncAnimation(fig, animate, frames=num_frames, interval=num_frames, blit=True) return (anim)
def gradient_descent(self, g, w, **kwargs): # create gradient function self.g = g self.grad = compute_grad(self.g) # parse optional arguments max_its = 100 if 'max_its' in kwargs: max_its = kwargs['max_its'] version = 'unnormalized' if 'version' in kwargs: version = kwargs['version'] alpha = 10**-4 if 'alpha' in kwargs: alpha = kwargs['alpha'] steplength_rule = 'none' if 'steplength_rule' in kwargs: steplength_rule = kwargs['steplength_rule'] projection = 'None' if 'projection' in kwargs: projection = kwargs['projection'] verbose = False if 'verbose' in kwargs: verbose = kwargs['verbose'] # create container for weight history w_hist = [] w_hist.append(w) # start gradient descent loop if verbose == True: print('starting optimization...') for k in range(max_its): # plug in value into func and derivative grad_eval = self.grad(w) grad_eval.shape = np.shape(w) ### normalized or unnormalized descent step? ### if version == 'normalized': grad_norm = np.linalg.norm(grad_eval) if grad_norm == 0: grad_norm += 10**-6 * np.sign(2 * np.random.rand(1) - 1) grad_eval /= grad_norm # use backtracking line search? if steplength_rule == 'backtracking': alpha = self.backtracking(w, grad_eval) # use a pre-set diminishing steplength parameter? if steplength_rule == 'diminishing': alpha = 1 / (float(k + 1)) ### take gradient descent step ### w = w - alpha * grad_eval # record w_hist.append(w) if verbose == True: print('...optimization complete!') time.sleep(1.5) clear_output() return w_hist
def draw_2d(self, **kwargs): self.g = kwargs['g'] # input function self.grad = compute_grad(self.g) # gradient of input function self.w_init = float( -2 ) # user-defined initial point (adjustable when calling each algorithm) self.alpha = 10**-4 # user-defined step length for gradient descent (adjustable when calling gradient descent) self.max_its = 20 # max iterations to run for each algorithm self.w_hist = [] # container for algorithm path wmin = -3.1 wmax = 3.1 if 'wmin' in kwargs: wmin = kwargs['wmin'] if 'wmax' in kwargs: wmax = kwargs['wmax'] # get new initial point if desired if 'w_inits' in kwargs: self.w_inits = kwargs['w_inits'] self.w_inits = [float(s) for s in self.w_inits] # take in user defined step length if 'steplength' in kwargs: self.steplength = kwargs['steplength'] # take in user defined maximum number of iterations if 'max_its' in kwargs: self.max_its = float(kwargs['max_its']) # version of gradient descent to use (normalized or unnormalized) self.version = 'unnormalized' if 'version' in kwargs: self.version = kwargs['version'] # initialize figure fig = plt.figure(figsize=(9, 4)) artist = fig # remove whitespace from figure #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace #fig.subplots_adjust(wspace=0.01,hspace=0.01) # create subplot with 2 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1]) ax1 = plt.subplot(gs[0]) ax2 = plt.subplot(gs[1]) # generate function for plotting on each slide w_plot = np.linspace(wmin, wmax, 500) g_plot = self.g(w_plot) g_range = max(g_plot) - min(g_plot) ggap = g_range * 0.1 width = 30 #### loop over all initializations, run gradient descent algorithm for each and plot results ### for j in range(len(self.w_inits)): # get next initialization self.w_init = self.w_inits[j] # run grad descent for this init self.w_hist = [] self.run_gradient_descent() # colors for points --> green as the algorithm begins, yellow as it converges, red at final point s = np.linspace(0, 1, len(self.w_hist[:round(len(self.w_hist) / 2)])) s.shape = (len(s), 1) t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):])) t.shape = (len(t), 1) s = np.vstack((s, t)) self.colorspec = [] self.colorspec = np.concatenate((s, np.flipud(s)), 1) self.colorspec = np.concatenate( (self.colorspec, np.zeros((len(s), 1))), 1) # plot function, axes lines ax1.plot(w_plot, g_plot, color='k', zorder=2) # plot function ax1.axhline(y=0, color='k', zorder=1, linewidth=0.25) ax1.axvline(x=0, color='k', zorder=1, linewidth=0.25) ax1.set_xlabel(r'$w$', fontsize=13) ax1.set_ylabel(r'$g(w)$', fontsize=13, rotation=0, labelpad=25) ax2.plot(w_plot, g_plot, color='k', zorder=2) # plot function ax2.axhline(y=0, color='k', zorder=1, linewidth=0.25) ax2.axvline(x=0, color='k', zorder=1, linewidth=0.25) ax2.set_xlabel(r'$w$', fontsize=13) ax2.set_ylabel(r'$g(w)$', fontsize=13, rotation=0, labelpad=25) ### plot all gradient descent points ### for k in range(len(self.w_hist)): # pick out current weight and function value from history, then plot w_val = self.w_hist[k] g_val = self.g(w_val) ax2.scatter(w_val, g_val, s=90, c=self.colorspec[k], edgecolor='k', linewidth=0.5 * ((1 / (float(k) + 1)))**(0.4), zorder=3, marker='X') # evaluation on function ax2.scatter(w_val, 0, s=90, facecolor=self.colorspec[k], edgecolor='k', linewidth=0.5 * ((1 / (float(k) + 1)))**(0.4), zorder=3)
def draw_it(func, **kwargs): view = [10, 150] if 'view' in kwargs: view = kwargs['view'] # generate input space for plotting w_in = np.linspace(-5, 5, 100) w1_vals, w2_vals = np.meshgrid(w_in, w_in) w1_vals.shape = (len(w_in)**2, 1) w2_vals.shape = (len(w_in)**2, 1) w_vals = np.concatenate((w1_vals, w2_vals), axis=1).T w1_vals.shape = (len(w_in), len(w_in)) w2_vals.shape = (len(w_in), len(w_in)) # compute grad vals grad = compute_grad(func) grad_vals = [grad(s) for s in w_vals.T] grad_vals = np.asarray(grad_vals) # compute hessian hess = hessian(func) hess_vals = [hess(s) for s in w_vals.T] # define figure fig = plt.figure(figsize=(9, 6)) ### plot original function ### ax1 = plt.subplot2grid((3, 6), (0, 3), colspan=1, projection='3d') # evaluate function, reshape g_vals = func(w_vals) g_vals.shape = (len(w_in), len(w_in)) # plot function surface ax1.plot_surface(w1_vals, w2_vals, g_vals, alpha=0.1, color='w', zorder=1, rstride=15, cstride=15, linewidth=0.5, edgecolor='k') ax1.set_title(r'$g(w_1,w_2)$', fontsize=10) # cleanup axis cleanup(g_vals, view, ax1) ### plot first derivative functions ### ax2 = plt.subplot2grid((3, 6), (1, 2), colspan=1, projection='3d') ax3 = plt.subplot2grid((3, 6), (1, 4), colspan=1, projection='3d') # plot first function grad_vals1 = grad_vals[:, 0] grad_vals1.shape = (len(w_in), len(w_in)) ax2.plot_surface(w1_vals, w2_vals, grad_vals1, alpha=0.1, color='w', zorder=1, rstride=15, cstride=15, linewidth=0.5, edgecolor='k') ax2.set_title(r'$\frac{\partial}{\partial w_1}g(w_1,w_2)$', fontsize=10) # cleanup axis cleanup(grad_vals1, view, ax2) # plot second grad_vals1 = grad_vals[:, 1] grad_vals1.shape = (len(w_in), len(w_in)) ax3.plot_surface(w1_vals, w2_vals, grad_vals1, alpha=0.1, color='w', zorder=1, rstride=15, cstride=15, linewidth=0.5, edgecolor='k') ax3.set_title(r'$\frac{\partial}{\partial w_2}g(w_1,w_2)$', fontsize=10) # cleanup axis cleanup(grad_vals1, view, ax3) ### plot second derivatives ### ax4 = plt.subplot2grid((3, 6), (2, 1), colspan=1, projection='3d') ax5 = plt.subplot2grid((3, 6), (2, 3), colspan=1, projection='3d') ax6 = plt.subplot2grid((3, 6), (2, 5), colspan=1, projection='3d') # plot first hessian function hess_vals1 = np.asarray([s[0, 0] for s in hess_vals]) hess_vals1.shape = (len(w_in), len(w_in)) ax4.plot_surface(w1_vals, w2_vals, hess_vals1, alpha=0.1, color='w', zorder=1, rstride=15, cstride=15, linewidth=0.5, edgecolor='k') ax4.set_title( r'$\frac{\partial}{\partial w_1}\frac{\partial}{\partial w_1}g(w_1,w_2)$', fontsize=10) # cleanup axis cleanup(hess_vals1, view, ax4) # plot second hessian function hess_vals1 = np.asarray([s[1, 0] for s in hess_vals]) hess_vals1.shape = (len(w_in), len(w_in)) ax5.plot_surface(w1_vals, w2_vals, hess_vals1, alpha=0.1, color='w', zorder=1, rstride=15, cstride=15, linewidth=0.5, edgecolor='k') ax5.set_title( r'$\frac{\partial}{\partial w_1}\frac{\partial}{\partial w_2}g(w_1,w_2)=\frac{\partial}{\partial w_2}\frac{\partial}{\partial w_1}g(w_1,w_2)$', fontsize=10) # cleanup axis cleanup(hess_vals1, view, ax5) # plot first hessian function hess_vals1 = np.asarray([s[1, 1] for s in hess_vals]) hess_vals1.shape = (len(w_in), len(w_in)) ax6.plot_surface(w1_vals, w2_vals, hess_vals1, alpha=0.1, color='w', zorder=1, rstride=15, cstride=15, linewidth=0.5, edgecolor='k') ax6.set_title( r'$\frac{\partial}{\partial w_2}\frac{\partial}{\partial w_2}g(w_1,w_2)$', fontsize=10) # cleanup axis cleanup(hess_vals1, view, ax6) plt.show()