def show_stationary_v2(func1,func2,func3,**kwargs):
    '''
    Input three functions, draw each highlighting their stationary points and draw tangent lines, draw the first and second derivatives stationary point evaluations  on each as well
    '''
        
    # define input space
    w = np.linspace(-3,3,5000)                  # input range for original function
    if 'w' in kwargs:
        w = kwargs['w']

    # construct figure
    fig = plt.figure(figsize = (7,5))
          
    # remove whitespace from figure
    #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace
    fig.subplots_adjust(wspace=0.2,hspace=0.8)
       
    # create subplot with 3 panels, plot input function in center plot
    gs = gridspec.GridSpec(3, 3, width_ratios=[1,1,1]) 
  
    ###### draw function, tangent lines, etc., ######
    for k in range(3):
        ax = plt.subplot(gs[k]); 
        ax2 =  plt.subplot(gs[k+3],sharex=ax);  
        ax3 =  plt.subplot(gs[k+6],sharex=ax);  
        
        func = func1
        if k == 1:
            func = func2
        if k == 2:
            func = func3

        # generate a range of values over which to plot input function, and derivatives
        g_plot = func(w)
        grad = compute_grad(func)
        grad_plot = np.array([grad(s) for s in w])
        wgap = (max(w) - min(w))*0.1
        ggap = (max(g_plot) - min(g_plot))*0.1
        grad_gap = (max(grad_plot) - min(grad_plot))*0.1

        hess = compute_grad(grad)
        hess_plot = np.array([hess(s) for s in w])
        hess_gap = (max(hess_plot) - min(hess_plot))*0.1
            
        # plot first in top panel, derivative in bottom panel
        ax.plot(w,g_plot,color = 'k',zorder = 1,linewidth=2)   
        ax.set_title(r'$g(w)$',fontsize = 12)
        ax.set_xlim([min(w)-wgap,max(w)+wgap])
        ax.set_ylim([min(g_plot) - ggap, max(g_plot) + ggap])
        
        # plot derivative and horizontal axis
        ax2.plot(w,grad_plot,color = 'k',zorder = 1,linewidth = 2) 
        ax2.plot(w,grad_plot*0,color = 'k',zorder = 1,linewidth = 1,linestyle = '--') 
        ax2.set_title(r'$\frac{\mathrm{d}}{\mathrm{d}w}g(w)$',fontsize = 12)
        ax2.set_ylim([min(grad_plot) - grad_gap, max(grad_plot) + grad_gap])

        # plot second derivative and horizontal axis
        ax3.plot(w,hess_plot,color = 'k',zorder = 1,linewidth = 2) 
        ax3.plot(w,hess_plot*0,color = 'k',zorder = 1,linewidth = 1,linestyle = '--') 
        ax3.set_title(r'$\frac{\mathrm{d}^2}{\mathrm{d}w^2}g(w)$',fontsize = 12)
        ax3.set_ylim([min(hess_plot) - hess_gap, max(hess_plot) + hess_gap])
       
        # clean up and label axes 
        ax.tick_params(labelsize=6)
        ax2.tick_params(labelsize=6)
        ax3.tick_params(labelsize=6)

        # determine zero derivative points 'visually'
        grad_station = copy.deepcopy(grad_plot)
        grad_station = np.sign(grad_station)
        ind = []
        for i in range(len(grad_station)-1):
            pt1 = grad_station[i]
            pt2 = grad_station[i+1]
            plot_pt1 = grad_plot[i]
            plot_pt2 = grad_plot[i+1]

            # if either point is zero add to list
            if pt1 == 0 or abs(plot_pt1) < 10**-5:
                ind.append(i)
            if pt2 == 0:
                ind.append(i+1)

            # if grad difference is small then sign change has taken place, add to list
            gap = abs(pt1 + pt2)
            if gap < 2 and pt1 !=0 and pt2 != 0:
                ind.append(i)

        # keep unique pts
        ind = np.unique(ind)
        
        # plot the input/output tangency points and tangent line
        wtan = np.linspace(-1,1,500)                  # input range for original function
        for pt in ind:
            # plot point
            w_val = w[pt]
            g_val = func(w_val)
            grad_val = grad(w_val)
            hess_val = hess(w_val)
            ax.scatter(w_val,g_val,s = 40,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3)            # plot point of tangency
            ax2.scatter(w_val,grad_val,s = 40,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3)            # plot point of tangency
            ax3.scatter(w_val,hess_val,s = 40,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3)            # plot point of tangency
            
            # plot tangent line in original space
            w1 = w_val - 1
            w2 = w_val + 1
            wrange = np.linspace(w1,w2, 100)
            h = g_val + 0*(wrange - w_val)
            ax.plot(wrange,h,color = 'lime',alpha = 0.5,linewidth = 1.5,zorder = 2)      # plot approx
    plt.show()
def show_stationary_1func(func,**kwargs):
    '''
    Input one functions, draw each highlighting its stationary points 
    '''
        
    # define input space
    wmax = -3
    if 'wmax' in kwargs:
        wmax = kwargs['wmax']
    w = np.linspace(-wmax,wmax,5000)                  # input range for original function

    # construct figure
    fig = plt.figure(figsize = (6,3))
          
    # remove whitespace from figure
    #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace
    fig.subplots_adjust(wspace=0.3,hspace=0.4)
       
    # create subplot with 3 panels, plot input function in center plot
    gs = gridspec.GridSpec(1, 2, width_ratios=[1,1]) 
  
    ###### draw function, tangent lines, etc., ######
    ax = plt.subplot(gs[0]); 
    ax2 =  plt.subplot(gs[1],sharey=ax);  

    # generate a range of values over which to plot input function, and derivatives
    g_plot = func(w)
    grad = compute_grad(func)
    grad_plot = np.array([grad(s) for s in w])
    wgap = (max(w) - min(w))*0.1
    ggap = (max(g_plot) - min(g_plot))*0.1
    grad_gap = (max(grad_plot) - min(grad_plot))*0.1
        
    # plot first in top panel, derivative in bottom panel
    ax.plot(w,g_plot,color = 'k',zorder = 1,linewidth=2)  
    ax.set_title(r'$g(w)$',fontsize = 12)
    ax.set_xlim([min(w)-wgap,max(w)+wgap])
    ax.set_ylim([min(g_plot) - ggap, max(g_plot) + ggap])
        
    # plot function with stationary points marked 
    ax2.plot(w,g_plot,color = 'k',zorder = 1,linewidth = 2) 
    ax2.set_title(r'$g(w)$',fontsize = 12)
    ax2.set_ylim([min(g_plot) - ggap, max(g_plot) + ggap])

    # clean up and label axes 
    ax.tick_params(labelsize=6)
    ax2.tick_params(labelsize=6)

    # determine zero derivative points 'visually'
    grad_station = copy.deepcopy(grad_plot)
    grad_station = np.sign(grad_station)
    ind = []
    for i in range(len(grad_station)-1):
        pt1 = grad_station[i]
        pt2 = grad_station[i+1]
        plot_pt1 = grad_plot[i]
        plot_pt2 = grad_plot[i+1]

        # if either point is zero add to list
        if pt1 == 0 or abs(plot_pt1) < 10**-5:
            ind.append(i)
        if pt2 == 0:
            ind.append(i+1)

        # if grad difference is small then sign change has taken place, add to list
        gap = abs(pt1 + pt2)
        if gap < 2 and pt1 !=0 and pt2 != 0:
            ind.append(i)

    # keep unique pts
    ind = np.unique(ind)
        
    # plot the input/output tangency points and tangent line
    wtan = np.linspace(-1,1,500)                  # input range for original function
    for pt in ind:
        # plot point
        w_val = w[pt]
        g_val = func(w_val)
        grad_val = grad(w_val)
        ax2.scatter(w_val,g_val,s = 40,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3)            # plot point of tangency
    plt.show()
def compare_2d3d(func1,func2,**kwargs):
    # input arguments
    view = [20,-65]
    if 'view' in kwargs:
        view = kwargs['view']
        
    # define input space
    w = np.linspace(-3,3,200)                  # input range for original function
    if 'w' in kwargs:
        w = kwargs['w']
        
    # define pts
    pt1 = 0
    if 'pt1' in kwargs:
        pt1 = kwargs['pt1']
        
    pt2 = [0,0]
    if 'pt2' in kwargs:
        pt2 = kwargs['pt2']
    
    # construct figure
    fig = plt.figure(figsize = (6,3))
          
    # remove whitespace from figure
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace
    fig.subplots_adjust(wspace=0.01,hspace=0.01)
        
    # create subplot with 3 panels, plot input function in center plot
    gs = gridspec.GridSpec(1, 2, width_ratios=[1,2]) 
  
    ### draw 2d version ###
    ax1 = plt.subplot(gs[0]); 
    grad = compute_grad(func1)
    
    # generate a range of values over which to plot input function, and derivatives
    g_plot = func1(w)
    g_range = max(g_plot) - min(g_plot)             # used for cleaning up final plot
    ggap = g_range*0.2
    
    # grab the next input/output tangency pair, the center of the next approximation(s)
    pt1 = float(pt1)
    g_val = func1(pt1)

    # plot original function
    ax1.plot(w,g_plot,color = 'k',zorder = 1,linewidth=2)                          
    
    # plot the input/output tangency point
    ax1.scatter(pt1,g_val,s = 60,c = 'lime',edgecolor = 'k',linewidth = 2,zorder = 3)            # plot point of tangency

    #### plot first order approximation ####
    # plug input into the first derivative
    g_grad_val = grad(pt1)

    # compute first order approximation
    w1 = pt1 - 3
    w2 = pt1 + 3
    wrange = np.linspace(w1,w2, 100)
    h = g_val + g_grad_val*(wrange - pt1)

    # plot the first order approximation
    ax1.plot(wrange,h,color = 'lime',alpha = 0.5,linewidth = 3,zorder = 2)      # plot approx
    
    # make new x-axis
    ax1.plot(w,g_plot*0,linewidth=3,color = 'k')
    
    #### clean up panel ####
    # fix viewing limits on panel
    ax1.set_xlim([min(w),max(w)])
    ax1.set_ylim([min(min(g_plot) - ggap,-4),max(max(g_plot) + ggap,0.5)])

    # label axes
    ax1.set_xlabel('$w$',fontsize = 12,labelpad = -50)
    ax1.set_ylabel('$g(w)$',fontsize = 25,rotation = 0,labelpad = 50)
    
    ax1.grid(False)
    ax1.yaxis.set_visible(False)
    ax1.spines['right'].set_visible(False)
    ax1.spines['top'].set_visible(False)
    ax1.spines['left'].set_visible(False)
    
    
    ### draw 3d version ###
    ax2 = plt.subplot(gs[1],projection='3d'); 
    grad = compute_grad(func2)
    w_val = [float(0),float(0)]
    
    # define input space
    w1_vals, w2_vals = np.meshgrid(w,w)
    w1_vals.shape = (len(w)**2,1)
    w2_vals.shape = (len(w)**2,1)
    w_vals = np.concatenate((w1_vals,w2_vals),axis=1).T
    g_vals = func2(w_vals) 
      
    # evaluation points
    w_val = np.array([float(pt2[0]),float(pt2[1])])
    w_val.shape = (2,1)
    g_val = func2(w_val)
    grad_val = grad(w_val)
    grad_val.shape = (2,1)  

    # create and evaluate tangent hyperplane
    w1tan_vals, w2tan_vals = np.meshgrid(w,w)
    w1tan_vals.shape = (len(w)**2,1)
    w2tan_vals.shape = (len(w)**2,1)
    wtan_vals = np.concatenate((w1tan_vals,w2tan_vals),axis=1).T

    #h = lambda weh: g_val +  np.dot( (weh - w_val).T,grad_val)
    h = lambda weh: g_val + (weh[0]-w_val[0])*grad_val[0] + (weh[1]-w_val[1])*grad_val[1]     
    h_vals = h(wtan_vals + w_val)

    # vals for cost surface, reshape for plot_surface function
    w1_vals.shape = (len(w),len(w))
    w2_vals.shape = (len(w),len(w))
    g_vals.shape = (len(w),len(w))
    w1tan_vals += w_val[0]
    w2tan_vals += w_val[1]
    w1tan_vals.shape =  (len(w),len(w))
    w2tan_vals.shape =  (len(w),len(w))
    h_vals.shape = (len(w),len(w))

    ### plot function ###
    ax2.plot_surface(w1_vals, w2_vals, g_vals, alpha = 0.5,color = 'w',rstride=25, cstride=25,linewidth=1,edgecolor = 'k',zorder = 2)

    ### plot z=0 plane ###
    ax2.plot_surface(w1_vals, w2_vals, g_vals*0, alpha = 0.1,color = 'w',zorder = 1,rstride=25, cstride=25,linewidth=0.3,edgecolor = 'k') 

    ### plot tangent plane ###
    ax2.plot_surface(w1tan_vals, w2tan_vals, h_vals, alpha = 0.4,color = 'lime',zorder = 1,rstride=50, cstride=50,linewidth=1,edgecolor = 'k')     

    # scatter tangency 
    ax2.scatter(w_val[0],w_val[1],g_val,s = 70,c = 'lime',edgecolor = 'k',linewidth = 2)
    
    ### clean up plot ###
    # plot x and y axes, and clean up
    ax2.xaxis.pane.fill = False
    ax2.yaxis.pane.fill = False
    ax2.zaxis.pane.fill = False

    #ax2.xaxis.pane.set_edgecolor('white')
    ax2.yaxis.pane.set_edgecolor('white')
    ax2.zaxis.pane.set_edgecolor('white')

    # remove axes lines and tickmarks
    ax2.w_zaxis.line.set_lw(0.)
    ax2.set_zticks([])
    ax2.w_xaxis.line.set_lw(0.)
    ax2.set_xticks([])
    ax2.w_yaxis.line.set_lw(0.)
    ax2.set_yticks([])

    # set viewing angle
    ax2.view_init(view[0],view[1])

    # set vewing limits
    wgap = (max(w) - min(w))*0.4
    y = max(w) + wgap
    ax2.set_xlim([-y,y])
    ax2.set_ylim([-y,y])
    
    zmin = min(np.min(g_vals),-0.5)
    zmax = max(np.max(g_vals),+0.5)
    ax2.set_zlim([zmin,zmax])

    # label plot
    fontsize = 12
    ax2.set_xlabel(r'$w_1$',fontsize = fontsize,labelpad = -30)
    ax2.set_ylabel(r'$w_2$',fontsize = fontsize,rotation = 0,labelpad=-30)
        
    plt.show()
Esempio n. 4
0
    def run(self, g, w_init, steplength_vals, max_its, **kwargs):
        # count up steplength vals
        step_count = len(steplength_vals)

        ### input arguments ###
        self.g = g
        self.max_its = max_its
        self.grad = compute_grad(self.g)  # gradient of input function
        self.w_init = w_init

        pts = 'off'
        if 'pts' in kwargs:
            pts = 'off'

        linewidth = 2.5
        if 'linewidth' in kwargs:
            linewidth = kwargs['linewidth']

        view = [20, -50]
        if 'view' in kwargs:
            view = kwargs['view']

        axes = False
        if 'axes' in kwargs:
            axes = kwargs['axes']

        plot_final = False
        if 'plot_final' in kwargs:
            plot_final = kwargs['plot_final']

        num_contours = 15
        if 'num_contours' in kwargs:
            num_contours = kwargs['num_contours']

        # version of gradient descent to use (normalized or unnormalized)
        self.version = 'unnormalized'
        if 'version' in kwargs:
            self.version = kwargs['version']

        # get initial point
        if np.size(self.w_init) == 2:
            self.w_init = np.asarray([float(s) for s in self.w_init])
        else:
            self.w_init = float(self.w_init)

        # take in user defined maximum number of iterations
        self.max_its = max_its

        ##### construct figure with panels #####
        # loop over steplengths, plot panels for each
        count = 0
        for step in steplength_vals:
            # construct figure
            fig, axs = plt.subplots(1, 2, figsize=(9, 4))

            # create subplot with 3 panels, plot input function in center plot
            gs = gridspec.GridSpec(1, 2, width_ratios=[2, 1])
            ax = plt.subplot(gs[0], aspect='equal')
            ax2 = plt.subplot(gs[1])  #  ,sharey = ax);

            #### run local random search algorithm ####
            self.w_hist = []
            self.steplength = steplength_vals[count]
            self.run_gradient_descent()
            count += 1

            # colors for points
            s = np.linspace(0, 1,
                            len(self.w_hist[:round(len(self.w_hist) / 2)]))
            s.shape = (len(s), 1)
            t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):]))
            t.shape = (len(t), 1)
            s = np.vstack((s, t))
            colorspec = []
            colorspec = np.concatenate((s, np.flipud(s)), 1)
            colorspec = np.concatenate((colorspec, np.zeros((len(s), 1))), 1)

            #### define input space for function and evaluate ####
            if np.size(
                    self.w_init
            ) == 2:  # function is multi-input, plot 3d function contour
                # set viewing limits on contour plot
                xvals = [self.w_hist[s][0] for s in range(len(self.w_hist))]
                xvals.append(self.w_init[0])
                yvals = [self.w_hist[s][1] for s in range(len(self.w_hist))]
                yvals.append(self.w_init[1])
                xmax = max(xvals)
                xmin = min(xvals)
                xgap = (xmax - xmin) * 0.1
                ymax = max(yvals)
                ymin = min(yvals)
                ygap = (ymax - ymin) * 0.1
                xmin -= xgap
                xmax += xgap
                ymin -= ygap
                ymax += ygap

                if 'xmin' in kwargs:
                    xmin = kwargs['xmin']
                if 'xmax' in kwargs:
                    xmax = kwargs['xmax']
                if 'ymin' in kwargs:
                    ymin = kwargs['ymin']
                if 'ymax' in kwargs:
                    ymax = kwargs['ymax']

                w1 = np.linspace(xmin, xmax, 400)
                w2 = np.linspace(ymin, ymax, 400)
                w1_vals, w2_vals = np.meshgrid(w1, w2)
                w1_vals.shape = (len(w1)**2, 1)
                w2_vals.shape = (len(w2)**2, 1)
                h = np.concatenate((w1_vals, w2_vals), axis=1)
                func_vals = np.asarray([g(s) for s in h])
                w1_vals.shape = (len(w1), len(w1))
                w2_vals.shape = (len(w2), len(w2))
                func_vals.shape = (len(w1), len(w2))

                ### make contour right plot - as well as horizontal and vertical axes ###
                # set level ridges
                num_contours = kwargs['num_contours']
                levelmin = min(func_vals.flatten())
                levelmax = max(func_vals.flatten())
                cutoff = 0.5
                cutoff = (levelmax - levelmin) * cutoff
                numper = 3
                levels1 = np.linspace(cutoff, levelmax, numper)
                num_contours -= numper

                levels2 = np.linspace(levelmin, cutoff,
                                      min(num_contours, numper))
                levels = np.unique(np.append(levels1, levels2))
                num_contours -= numper
                while num_contours > 0:
                    cutoff = levels[1]
                    levels2 = np.linspace(levelmin, cutoff,
                                          min(num_contours, numper))
                    levels = np.unique(np.append(levels2, levels))
                    num_contours -= numper

                a = ax.contour(w1_vals,
                               w2_vals,
                               func_vals,
                               levels=levels,
                               colors='k')
                ax.contourf(w1_vals,
                            w2_vals,
                            func_vals,
                            levels=levels,
                            cmap='Blues')

                # plot points on contour
                for j in range(len(self.w_hist)):
                    w_val = self.w_hist[j]
                    g_val = self.g(w_val)

                    # plot in left panel
                    if pts == 'on':
                        ax.scatter(w_val[0],
                                   w_val[1],
                                   s=30,
                                   c=colorspec[j],
                                   edgecolor='k',
                                   linewidth=1.5 * math.sqrt(
                                       (1 / (float(j) + 1))),
                                   zorder=3)

                        ax2.scatter(j,
                                    g_val,
                                    s=30,
                                    c=colorspec[j],
                                    edgecolor='k',
                                    linewidth=0.7,
                                    zorder=3)  # plot point of tangency

                    # plot connector between points for visualization purposes
                    if j > 0:
                        w_old = self.w_hist[j - 1]
                        w_new = self.w_hist[j]
                        g_old = self.g(w_old)
                        g_new = self.g(w_new)

                        ax.plot([w_old[0], w_new[0]], [w_old[1], w_new[1]],
                                color=colorspec[j],
                                linewidth=linewidth,
                                alpha=1,
                                zorder=2)  # plot approx
                        ax.plot([w_old[0], w_new[0]], [w_old[1], w_new[1]],
                                color='k',
                                linewidth=linewidth + 0.4,
                                alpha=1,
                                zorder=1)  # plot approx
                        ax2.plot([j - 1, j], [g_old, g_new],
                                 color=colorspec[j],
                                 linewidth=2,
                                 alpha=1,
                                 zorder=2)  # plot approx
                        ax2.plot([j - 1, j], [g_old, g_new],
                                 color='k',
                                 linewidth=2.5,
                                 alpha=1,
                                 zorder=1)  # plot approx

                # clean up panel
                ax.set_xlabel('$w_1$', fontsize=12)
                ax.set_ylabel('$w_2$', fontsize=12, rotation=0)
                ax.axhline(y=0, color='k', zorder=0, linewidth=0.5)
                ax.axvline(x=0, color='k', zorder=0, linewidth=0.5)
                ax.set_xlim([xmin, xmax])
                ax.set_ylim([ymin, ymax])

            else:  # function is single input, plot curve
                if 'xmin' in kwargs:
                    xmin = kwargs['xmin']
                if 'xmax' in kwargs:
                    xmax = kwargs['xmax']

                w_plot = np.linspace(xmin, xmax, 500)
                g_plot = self.g(w_plot)
                ax.plot(w_plot, g_plot, color='k', linewidth=2, zorder=2)

                # set viewing limits
                ymin = min(g_plot)
                ymax = max(g_plot)
                ygap = (ymax - ymin) * 0.2
                ymin -= ygap
                ymax += ygap
                ax.set_ylim([ymin, ymax])

                # clean up panel
                ax.axhline(y=0, color='k', zorder=1, linewidth=0.25)
                ax.axvline(x=0, color='k', zorder=1, linewidth=0.25)
                ax.set_xlabel(r'$w$', fontsize=13)
                ax.set_ylabel(r'$g(w)$', fontsize=13, rotation=0, labelpad=25)

                # function single-input, plot input and evaluation points on function
                for j in range(len(self.w_hist)):
                    w_val = self.w_hist[j]
                    g_val = self.g(w_val)

                    ax.scatter(w_val,
                               g_val,
                               s=90,
                               c=colorspec[j],
                               edgecolor='k',
                               linewidth=0.5 * ((1 / (float(j) + 1)))**(0.4),
                               zorder=3,
                               marker='X')  # evaluation on function
                    ax.scatter(w_val,
                               0,
                               s=90,
                               facecolor=colorspec[j],
                               edgecolor='k',
                               linewidth=0.5 * ((1 / (float(j) + 1)))**(0.4),
                               zorder=3)

                    ax2.scatter(j,
                                g_val,
                                s=30,
                                c=colorspec[j],
                                edgecolor='k',
                                linewidth=0.7,
                                zorder=3)  # plot point of tangency

                    # plot connector between points for visualization purposes
                    if j > 0:
                        w_old = self.w_hist[j - 1]
                        w_new = self.w_hist[j]
                        g_old = self.g(w_old)
                        g_new = self.g(w_new)

                        ax2.plot([j - 1, j], [g_old, g_new],
                                 color=colorspec[j],
                                 linewidth=2,
                                 alpha=1,
                                 zorder=2)  # plot approx
                        ax2.plot([j - 1, j], [g_old, g_new],
                                 color='k',
                                 linewidth=2.5,
                                 alpha=1,
                                 zorder=1)  # plot approx

            if axes == True:
                ax.axhline(linestyle='--', color='k', linewidth=1)
                ax.axvline(linestyle='--', color='k', linewidth=1)

            # clean panels
            title = self.steplength
            if type(self.steplength) == float or type(self.steplength) == int:
                title = r'$\alpha = $' + str(self.steplength)
            ax.set_title(title, fontsize=12)

            ax2.axhline(y=0, color='k', zorder=0, linewidth=0.5)
            ax2.set_xlabel('iteration', fontsize=12)
            ax2.set_ylabel(r'$g(w)$', fontsize=12, rotation=0, labelpad=25)

            ax.set(aspect='equal')
            a = ax.get_position()
            yr = ax.get_position().y1 - ax.get_position().y0
            xr = ax.get_position().x1 - ax.get_position().x0
            aspectratio = 1.25 * xr / yr  # + min(xr,yr)
            ratio_default = (ax2.get_xlim()[1] - ax2.get_xlim()[0]) / (
                ax2.get_ylim()[1] - ax2.get_ylim()[0])
            ax2.set_aspect(ratio_default * aspectratio)

            # plot
            plt.show()
    def animate_it(self, **args):
        self.g = args['g']  # input function defined by user
        self.grad = compute_grad(self.g)  # first derivative of input
        self.hess = compute_grad(self.grad)  # second derivative of input
        self.alpha_range = np.linspace(
            10**-4, 1, 20
        )  # default range of alpha (step length) values to try, adjustable
        self.max_its = 20

        # adjust range of step values to illustrate as well as initial point for all runs
        if 'alpha_range' in args:
            self.alpha_range = args['alpha_range']

        if 'max_its' in args:
            self.max_its = args['max_its']

        if 'w_init' in args:
            w_init = args['w_init']
            w_init = [float(a) for a in w_init]
            self.w_init = np.asarray(w_init)
            self.w_init.shape = (2, 1)

        view = [10, 50]
        if 'view' in args:
            view = args['view']

        # initialize figure
        fig = plt.figure(figsize=(9, 5))
        artist = fig

        # create subplot with 3 panels, plot input function in center plot
        gs = gridspec.GridSpec(1, 2, width_ratios=[3, 1])
        ax1 = plt.subplot(gs[0], projection='3d')
        ax2 = plt.subplot(gs[1])

        # animation sub-function
        print('starting animation rendering...')
        num_frames = len(self.alpha_range) + 1

        def animate(k):
            ax1.cla()
            ax2.cla()

            # print rendering update
            if np.mod(k + 1, 25) == 0:
                print('rendering animation frame ' + str(k + 1) + ' of ' +
                      str(num_frames))
            if k == num_frames - 1:
                print('animation rendering complete!')
                time.sleep(1.5)
                clear_output()

            # plot initial point and evaluation
            if k == 0:
                w_val = self.w_init
                g_val = self.g(w_val)
                ax1.scatter(w_val[0],
                            w_val[1],
                            g_val,
                            s=100,
                            c='m',
                            edgecolor='k',
                            linewidth=0.7,
                            zorder=2)  # plot point of tangency

                # plot function
                r = np.linspace(-3, 3, 100)

                # create grid from plotting range
                w1_vals, w2_vals = np.meshgrid(r, r)
                w1_vals.shape = (len(r)**2, 1)
                w2_vals.shape = (len(r)**2, 1)
                g_vals = self.g([w1_vals, w2_vals])

                # vals for cost surface
                w1_vals.shape = (len(r), len(r))
                w2_vals.shape = (len(r), len(r))
                g_vals.shape = (len(r), len(r))

                ax1.plot_surface(w1_vals,
                                 w2_vals,
                                 g_vals,
                                 alpha=0.1,
                                 color='k',
                                 rstride=15,
                                 cstride=15,
                                 linewidth=1,
                                 edgecolor='k')

            # plot function alone first along with initial point
            if k > 0:
                alpha = self.alpha_range[k - 1]

                # setup axes
                ax1.set_title(r'$\alpha = $' + r'{:.2f}'.format(alpha),
                              fontsize=14)
                ax2.set_xlabel('iteration', fontsize=13)
                ax2.set_ylabel('cost function value', fontsize=13)

                # run gradient descent method
                self.w_hist = []
                self.run_gradient_descent(alpha=alpha)

                # plot function
                self.plot_function(ax1)

                # colors for points
                s = np.linspace(0, 1,
                                len(self.w_hist[:round(len(self.w_hist) / 2)]))
                s.shape = (len(s), 1)
                t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):]))
                t.shape = (len(t), 1)
                s = np.vstack((s, t))
                self.colorspec = []
                self.colorspec = np.concatenate((s, np.flipud(s)), 1)
                self.colorspec = np.concatenate(
                    (self.colorspec, np.zeros((len(s), 1))), 1)

                # plot everything for each iteration
                for j in range(len(self.w_hist)):
                    w_val = self.w_hist[j]
                    g_val = self.g(w_val)
                    grad_val = self.grad(w_val)
                    ax1.scatter(w_val[0],
                                w_val[1],
                                g_val,
                                s=90,
                                c=self.colorspec[j],
                                edgecolor='k',
                                linewidth=0.7,
                                zorder=3)  # plot point of tangency

                    ### plot all on cost function decrease plot
                    ax2.scatter(j,
                                g_val,
                                s=90,
                                c=self.colorspec[j],
                                edgecolor='k',
                                linewidth=0.7,
                                zorder=3)  # plot point of tangency

                    # clean up second axis
                    ax2.set_xticks(np.arange(len(self.w_hist)))

                    # plot connector between points for visualization purposes
                    if j > 0:
                        w_old = self.w_hist[j - 1]
                        w_new = self.w_hist[j]
                        g_old = self.g(w_old)
                        g_new = self.g(w_new)
                        ax2.plot([j - 1, j], [g_old, g_new],
                                 color=self.colorspec[j],
                                 linewidth=2,
                                 alpha=0.4,
                                 zorder=1)  # plot approx

            # clean up plot
            ax1.view_init(view[0], view[1])
            ax1.set_axis_off()

            return artist,

        anim = animation.FuncAnimation(fig,
                                       animate,
                                       frames=num_frames,
                                       interval=num_frames,
                                       blit=True)

        return (anim)
    def compare_versions_3d(self, g, w_init, steplength, max_its, **kwargs):
        ### input arguments ###
        self.g = g
        self.steplength = steplength
        self.max_its = max_its
        self.grad = compute_grad(self.g)  # gradient of input function

        wmax = 1
        if 'wmax' in kwargs:
            wmax = kwargs['wmax'] + 0.5

        view = [20, -50]
        if 'view' in kwargs:
            view = kwargs['view']

        axes = False
        if 'axes' in kwargs:
            axes = kwargs['axes']

        plot_final = False
        if 'plot_final' in kwargs:
            plot_final = kwargs['plot_final']

        num_contours = 10
        if 'num_contours' in kwargs:
            num_contours = kwargs['num_contours']

        # get initial point
        self.w_init = np.asarray([float(s) for s in w_init])

        # take in user defined step length
        self.steplength = steplength

        # take in user defined maximum number of iterations
        self.max_its = max_its

        ##### construct figure with panels #####
        # construct figure
        fig = plt.figure(figsize=(12, 6))

        # create subplot with 3 panels, plot input function in center plot
        gs = gridspec.GridSpec(2, 3, width_ratios=[1, 5, 10])
        ax3 = plt.subplot(gs[1], projection='3d')
        ax4 = plt.subplot(gs[2], aspect='equal')
        ax5 = plt.subplot(gs[4], projection='3d')
        ax6 = plt.subplot(gs[5], aspect='equal')

        # remove whitespace from figure
        fig.subplots_adjust(left=0, right=1, bottom=0,
                            top=1)  # remove whitespace

        #### define input space for function and evaluate ####
        w = np.linspace(-wmax, wmax, 200)
        w1_vals, w2_vals = np.meshgrid(w, w)
        w1_vals.shape = (len(w)**2, 1)
        w2_vals.shape = (len(w)**2, 1)
        h = np.concatenate((w1_vals, w2_vals), axis=1)
        func_vals = np.asarray([g(s) for s in h])
        w1_vals.shape = (len(w), len(w))
        w2_vals.shape = (len(w), len(w))
        func_vals.shape = (len(w), len(w))

        #### run local random search algorithms ####
        for algo in ['normalized', 'unnormalized']:
            # switch normalized / unnormalized
            self.version = algo
            title = ''
            if self.version == 'normalized':
                ax = ax3
                ax2 = ax4
                title = 'normalized gradient descent'
            else:
                ax = ax5
                ax2 = ax6
                title = 'unnormalized gradient descent'

        # plot function
            ax.plot_surface(w1_vals,
                            w2_vals,
                            func_vals,
                            alpha=0.1,
                            color='w',
                            rstride=25,
                            cstride=25,
                            linewidth=1,
                            edgecolor='k',
                            zorder=2)

            # plot z=0 plane
            ax.plot_surface(w1_vals,
                            w2_vals,
                            func_vals * 0,
                            alpha=0.1,
                            color='w',
                            zorder=1,
                            rstride=25,
                            cstride=25,
                            linewidth=0.3,
                            edgecolor='k')

            ### make contour right plot - as well as horizontal and vertical axes ###
            ax2.contour(w1_vals, w2_vals, func_vals, num_contours, colors='k')
            if axes == True:
                ax2.axhline(linestyle='--', color='k', linewidth=1)
                ax2.axvline(linestyle='--', color='k', linewidth=1)

            self.w_hist = []
            self.run_gradient_descent()

            # colors for points
            s = np.linspace(0, 1,
                            len(self.w_hist[:round(len(self.w_hist) / 2)]))
            s.shape = (len(s), 1)
            t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):]))
            t.shape = (len(t), 1)
            s = np.vstack((s, t))
            colorspec = []
            colorspec = np.concatenate((s, np.flipud(s)), 1)
            colorspec = np.concatenate((colorspec, np.zeros((len(s), 1))), 1)

            #### scatter path points ####
            for k in range(len(self.w_hist)):
                w_now = self.w_hist[k]
                ax.scatter(w_now[0],
                           w_now[1],
                           0,
                           s=60,
                           c=colorspec[k],
                           edgecolor='k',
                           linewidth=0.5 * math.sqrt((1 / (float(k) + 1))),
                           zorder=3)

                ax2.scatter(w_now[0],
                            w_now[1],
                            s=60,
                            c=colorspec[k],
                            edgecolor='k',
                            linewidth=1.5 * math.sqrt((1 / (float(k) + 1))),
                            zorder=3)

            #### connect points with arrows ####
            if len(self.w_hist) < 10:
                for i in range(len(self.w_hist) - 1):
                    pt1 = self.w_hist[i]
                    pt2 = self.w_hist[i + 1]

                    # draw arrow in left plot
                    a = Arrow3D([pt1[0], pt2[0]], [pt1[1], pt2[1]], [0, 0],
                                mutation_scale=10,
                                lw=2,
                                arrowstyle="-|>",
                                color="k")
                    ax.add_artist(a)

                    # draw 2d arrow in right plot
                    ax2.arrow(pt1[0],
                              pt1[1], (pt2[0] - pt1[0]) * 0.78,
                              (pt2[1] - pt1[1]) * 0.78,
                              head_width=0.1,
                              head_length=0.1,
                              fc='k',
                              ec='k',
                              linewidth=3,
                              zorder=2,
                              length_includes_head=True)

            ### cleanup panels ###
            ax.set_xlabel('$w_1$', fontsize=12)
            ax.set_ylabel('$w_2$', fontsize=12, rotation=0)
            ax.set_title(title, fontsize=12)
            ax.view_init(view[0], view[1])

            ax2.set_xlabel('$w_1$', fontsize=12)
            ax2.set_ylabel('$w_2$', fontsize=12, rotation=0)
            ax2.axhline(y=0, color='k', zorder=0, linewidth=0.5)
            ax2.axvline(x=0, color='k', zorder=0, linewidth=0.5)

            # clean up axis
            ax.xaxis.pane.fill = False
            ax.yaxis.pane.fill = False
            ax.zaxis.pane.fill = False

            ax.xaxis.pane.set_edgecolor('white')
            ax.yaxis.pane.set_edgecolor('white')
            ax.zaxis.pane.set_edgecolor('white')

            ax.xaxis._axinfo["grid"]['color'] = (1, 1, 1, 0)
            ax.yaxis._axinfo["grid"]['color'] = (1, 1, 1, 0)
            ax.zaxis._axinfo["grid"]['color'] = (1, 1, 1, 0)

        # plot
        plt.show()
Esempio n. 7
0
    def newtons_method(self, g, win, **kwargs):
        # flatten gradient for simpler-written descent loop
        self.g, unflatten, w = flatten_func(g, win)

        self.grad = compute_grad(self.g)
        self.hess = compute_hess(self.g)

        # parse optional arguments
        max_its = 20
        if 'max_its' in kwargs:
            max_its = kwargs['max_its']
        self.epsilon = 10**-10
        if 'epsilon' in kwargs:
            self.epsilon = kwargs['epsilon']
        verbose = True
        if 'verbose' in kwargs:
            verbose = kwargs['verbose']
        output = 'history'
        if 'output' in kwargs:
            output = kwargs['output']
        self.counter = copy.deepcopy(self.g)
        if 'counter' in kwargs:
            counter = kwargs['counter']
            self.counter, unflatten, w = flatten_func(counter, win)

        # create container for weight history
        w_hist = []
        w_hist.append(unflatten(copy.deepcopy(w)))

        # start newton's method loop
        if verbose == True:
            print('starting optimization...')
        geval_old = self.g(w)

        self.w_best = unflatten(copy.deepcopy(w))
        g_best = self.counter(w)

        w_hist = []
        if output == 'history':
            w_hist.append(unflatten(w))

        # loop
        for k in range(max_its):
            # compute gradient and hessian
            grad_val = self.grad(w)
            hess_val = self.hess(w)
            hess_val.shape = (np.size(w), np.size(w))

            # solve linear system for weights
            C = hess_val + self.epsilon * np.eye(np.size(w))
            w = np.linalg.solve(C, np.dot(C, w) - grad_val)

            # eject from process if reaching singular system
            geval_new = self.g(w)
            if k > 2 and geval_new > geval_old:
                print('singular system reached')
                time.sleep(1.5)
                clear_output()
                if output == 'history':
                    return w_hist
                elif output == 'best':
                    return self.w_best
            else:
                geval_old = geval_new

            # record current weights
            if output == 'best':
                if self.g(w) < g_best:
                    g_best = self.counter(w)

                    self.w_best = copy.deepcopy(unflatten(w))

            w_hist.append(unflatten(w))

        if verbose == True:
            print('...optimization complete!')
            time.sleep(1.5)
            clear_output()
        if output == 'best':
            return self.w_best
        elif output == 'history':
            return w_hist
Esempio n. 8
0
    def animate_it(self,savepath,**kwargs):
        # presets
        self.g = kwargs['g']                            # input function
        self.grad = compute_grad(self.g)              # gradient of input function
        self.w_init =float( -2)                       # user-defined initial point (adjustable when calling each algorithm)
        self.max_its = 20                             # max iterations to run for each algorithm
        self.w_hist = []                              # container for algorithm path
        wmin = -3.1                                   # max and min viewing
        wmax = 3.1  
        self.steplength_range = np.linspace(10**-4,1,20)      # default range of alpha (step length) values to try, adjustable
        
        # adjust range of step values to illustrate as well as initial point for all runs
        if 'steplength_range' in kwargs:
            self.steplength_range = kwargs['steplength_range']
        if 'wmin' in kwargs:            
            wmin = kwargs['wmin']
        if 'wmax' in kwargs:
            wmax = kwargs['wmax']
        
        # get new initial point if desired
        if 'w_init' in kwargs:
            self.w_init = kwargs['w_init']
            
        # take in user defined step length
        if 'steplength' in kwargs:
            self.steplength = kwargs['steplength']
            
        # take in user defined maximum number of iterations
        if 'max_its' in kwargs:
            self.max_its = float(kwargs['max_its'])
            
        # version of gradient descent to use (normalized or unnormalized)
        self.version = 'unnormalized'
        if 'version' in kwargs:
            self.version = kwargs['version']
            
        # turn on first order approximation illustrated at each step
        tracers = 'off'
        if 'tracers' in kwargs:
            tracers = kwargs['tracers']
           
        # initialize figure
        fig = plt.figure(figsize = (9,4))
        artist = fig
        
        # create subplot with 2 panels, plot input function in center plot
        gs = gridspec.GridSpec(1, 2, width_ratios=[1,1]) 
        ax1 = plt.subplot(gs[0]);
        ax2 = plt.subplot(gs[1],sharey=ax1); 
        gs.update(wspace=0.5, hspace=0.1) 

        # generate function for plotting on each slide
        w_plot = np.linspace(wmin,wmax,500)
        g_plot = self.g(w_plot)
        g_range = max(g_plot) - min(g_plot)
        ggap = g_range*0.1
        width = 30
        
        # animation sub-function
        num_frames = len(self.steplength_range)+1
        print ('starting animation rendering...')
        def animate(k):
            ax1.cla()
            ax2.cla()
            
            # print rendering update            
            if np.mod(k+1,25) == 0:
                print ('rendering animation frame ' + str(k+1) + ' of ' + str(num_frames))
            if k == num_frames - 1:
                print ('animation rendering complete!')
                time.sleep(1.5)
                clear_output()
                
            # plot initial point and evaluation
            if k == 0:
                w_val = self.w_init
                g_val = self.g(w_val)
                ax1.scatter(w_val,g_val,s = 100,c = 'm',edgecolor = 'k',linewidth = 0.7,zorder = 2)            # plot point of tangency
                # ax1.scatter(w_val,0,s = 100,c = 'm',edgecolor = 'k',linewidth = 0.7, zorder = 2, marker = 'X')
                # plot function 
                ax1.plot(w_plot,g_plot,color = 'k',zorder = 0)               # plot function

            # plot function alone first along with initial point
            if k > 0:
                alpha = self.steplength_range[k-1]
                
                # run gradient descent method
                self.w_hist = []
                self.run_gradient_descent(alpha = alpha)
                
                # plot function
                self.plot_function(ax1)
        
                # colors for points
                s = np.linspace(0,1,len(self.w_hist[:round(len(self.w_hist)/2)]))
                s.shape = (len(s),1)
                t = np.ones(len(self.w_hist[round(len(self.w_hist)/2):]))
                t.shape = (len(t),1)
                s = np.vstack((s,t))
                self.colorspec = []
                self.colorspec = np.concatenate((s,np.flipud(s)),1)
                self.colorspec = np.concatenate((self.colorspec,np.zeros((len(s),1))),1)
        
                # plot everything for each iteration 
                for j in range(len(self.w_hist)):  
                    w_val = self.w_hist[j]
                    g_val = self.g(w_val)
                    grad_val = self.grad(w_val)
                    ax1.scatter(w_val,g_val,s = 90,c = self.colorspec[j],edgecolor = 'k',linewidth = 0.7,zorder = 3)            # plot point of tangency
                    
                    # ax1.scatter(w_val,0,s = 90,facecolor = self.colorspec[j],marker = 'X',edgecolor = 'k',linewidth = 0.7, zorder = 2)
                    
                    # determine width to plot the approximation -- so its length == width defined above
                    div = float(1 + grad_val**2)
                    w1 = w_val - math.sqrt(width/div)
                    w2 = w_val + math.sqrt(width/div)

                    # use point-slope form of line to plot
                    wrange = np.linspace(w1,w2, 100)
                    h = g_val + grad_val*(wrange - w_val)
                
                    # plot tracers connecting consecutive points on the cost (for visualization purposes)
                    if tracers == 'on':
                        if j > 0:
                            w_old = self.w_hist[j-1]
                            w_new = self.w_hist[j]
                            g_old = self.g(w_old)
                            g_new = self.g(w_new)
                            ax1.quiver(w_old, g_old, w_new - w_old, g_new - g_old, scale_units='xy', angles='xy', scale=1, color = self.colorspec[j],linewidth = 1.5,alpha = 0.2,linestyle = '-',headwidth = 4.5,edgecolor = 'k',headlength = 10,headaxislength = 7)
            
                    ### plot all on cost function decrease plot
                    ax2.scatter(j,g_val,s = 90,c = self.colorspec[j],edgecolor = 'k',linewidth = 0.7,zorder = 3)            # plot point of tangency
                    
                    # clean up second axis, set title on first
                    ax2.set_xticks(np.arange(len(self.w_hist)))
                    ax1.set_title(r'$\alpha = $' + r'{:.2f}'.format(alpha),fontsize = 14)

                    # plot connector between points for visualization purposes
                    if j > 0:
                        w_old = self.w_hist[j-1]
                        w_new = self.w_hist[j]
                        g_old = self.g(w_old)
                        g_new = self.g(w_new)
                        ax2.plot([j-1,j],[g_old,g_new],color = self.colorspec[j],linewidth = 2,alpha = 0.4,zorder = 1)      # plot approx
 
            ### clean up function plot ###
            # fix viewing limits on function plot
            #ax1.set_xlim([-3,3])
            #ax1.set_ylim([min(g_plot) - ggap,max(g_plot) + ggap])
            
            # draw axes and labels
            ax1.set_xlabel(r'$w$',fontsize = 13)
            ax1.set_ylabel(r'$g(w)$',fontsize = 13,rotation = 0,labelpad = 25)   

            ax2.set_xlabel('iteration',fontsize = 13)
            ax2.set_ylabel(r'$g(w)$',fontsize = 13,rotation = 0,labelpad = 25)
            ax1.axhline(y=0, color='k',zorder = 0,linewidth = 0.5)
            ax2.axhline(y=0, color='k',zorder = 0,linewidth = 0.5)

            return artist,

        anim = animation.FuncAnimation(fig, animate ,frames=num_frames, interval=num_frames, blit=True)

        # produce animation and save
        fps = 50
        if 'fps' in kwargs:
            fps = kwargs['fps']
        anim.save(savepath, fps=fps, extra_args=['-vcodec', 'libx264'])
        
        clear_output()    
Esempio n. 9
0
 def __init__(self,**args):
     self.g = args['g']                            # input function
     self.grad = compute_grad(self.g)              # gradient of input function
     self.w_init =float( -3)                       # input initial point
     self.w_hist = []
     self.colorspec = []                           # container for colors --> when algorithm begins, colored green, as it ends color turns yellow, then red
Esempio n. 10
0
def animate_visualize2d(**kwargs):
    g = kwargs['g']  # input function
    grad = compute_grad(g)  # gradient of input function
    colors = [[0, 1, 0.25], [0, 0.75,
                             1]]  # set of custom colors used for plotting

    num_frames = 300  # number of slides to create - the input range [-3,3] is divided evenly by this number
    if 'num_frames' in kwargs:
        num_frames = kwargs['num_frames']

    plot_descent = False
    if 'plot_descent' in kwargs:
        plot_descent = kwargs['plot_descent']

    # initialize figure
    fig = plt.figure(figsize=(16, 8))
    artist = fig

    # create subplot with 3 panels, plot input function in center plot
    gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1])
    ax1 = plt.subplot(gs[0])
    ax1.axis('off')
    ax3 = plt.subplot(gs[2])
    ax3.axis('off')

    # plot input function
    ax = plt.subplot(gs[1])

    # generate a range of values over which to plot input function, and derivatives
    w_plot = np.linspace(-3, 3, 200)  # input range for original function
    g_plot = g(w_plot)
    g_range = max(g_plot) - min(g_plot)  # used for cleaning up final plot
    ggap = g_range * 0.2
    w_vals = np.linspace(
        -3, 3, num_frames
    )  # range of values over which to plot first / second order approximations

    # animation sub-function
    print('starting animation rendering...')

    def animate(k):
        # clear the panel
        ax.cla()

        # print rendering update
        if np.mod(k + 1, 25) == 0:
            print('rendering animation frame ' + str(k + 1) + ' of ' +
                  str(num_frames))
        if k == num_frames - 1:
            print('animation rendering complete!')
            time.sleep(1.5)
            clear_output()

        # grab the next input/output tangency pair, the center of the next approximation(s)
        w_val = w_vals[k]
        g_val = g(w_val)

        # plot original function
        ax.plot(w_plot, g_plot, color='k', zorder=1,
                linewidth=4)  # plot function

        # plot the input/output tangency point
        ax.scatter(w_val,
                   g_val,
                   s=200,
                   c='lime',
                   edgecolor='k',
                   linewidth=2,
                   zorder=3)  # plot point of tangency

        #### plot first order approximation ####
        # plug input into the first derivative
        g_grad_val = grad(w_val)

        # determine width to plot the approximation -- so its length == width
        width = 1
        div = float(1 + g_grad_val**2)
        w1 = w_val - math.sqrt(width / div)
        w2 = w_val + math.sqrt(width / div)

        # compute first order approximation
        wrange = np.linspace(w1, w2, 100)
        h = g_val + g_grad_val * (wrange - w_val)

        # plot the first order approximation
        ax.plot(wrange, h, color='lime', alpha=0.5, linewidth=6,
                zorder=2)  # plot approx

        #### plot derivative as vector ####
        func = lambda w: g_val + g_grad_val * w
        name = r'$\frac{\mathrm{d}}{\mathrm{d}w}g(' + r'{:.2f}'.format(
            w_val) + r')$'
        if abs(func(1) - func(0)) >= 0:
            head_width = 0.08 * (func(1) - func(0))
            head_length = 0.2 * (func(1) - func(0))

            # annotate arrow and annotation
            if func(1) - func(0) >= 0:
                ax.arrow(0,
                         0,
                         func(1) - func(0),
                         0,
                         head_width=head_width,
                         head_length=head_length,
                         fc='k',
                         ec='k',
                         linewidth=2.5,
                         zorder=3)

                ax.annotate(name,
                            xy=(2, 1),
                            xytext=(func(1 + 0.3) - func(0), 0),
                            fontsize=22)
            elif func(1) - func(0) < 0:
                ax.arrow(0,
                         0,
                         func(1) - func(0),
                         0,
                         head_width=-head_width,
                         head_length=-head_length,
                         fc='k',
                         ec='k',
                         linewidth=2.5,
                         zorder=3)

                ax.annotate(name,
                            xy=(2, 1),
                            xytext=(func(1 + 0.3) - 1.3 - func(0), 0),
                            fontsize=22)

        #### plot negative derivative as vector ####
        if plot_descent == True:
            ax.scatter(0,
                       0,
                       c='k',
                       edgecolor='w',
                       s=100,
                       linewidth=0.5,
                       zorder=4)

            func = lambda w: g_val - g_grad_val * w
            name = r'$-\frac{\mathrm{d}}{\mathrm{d}w}g(' + r'{:.2f}'.format(
                w_val) + r')$'
            if abs(func(1) - func(0)) >= 0:
                head_width = 0.08 * (func(1) - func(0))
                head_length = 0.2 * (func(1) - func(0))

                # annotate arrow and annotation
                if func(1) - func(0) >= 0:
                    ax.arrow(0,
                             0,
                             func(1) - func(0),
                             0,
                             head_width=head_width,
                             head_length=head_length,
                             fc='r',
                             ec='r',
                             linewidth=2.5,
                             zorder=3)

                    ax.annotate(name,
                                xy=(2, 1),
                                xytext=(func(1 + 0.3) - 0.2 - func(0), 0),
                                fontsize=22)
                elif func(1) - func(0) < 0:
                    ax.arrow(0,
                             0,
                             func(1) - func(0),
                             0,
                             head_width=-head_width,
                             head_length=-head_length,
                             fc='r',
                             ec='r',
                             linewidth=2.5,
                             zorder=3)

                    ax.annotate(name,
                                xy=(2, 1),
                                xytext=(func(1 + 0.3) - 1.6 - func(0), 0),
                                fontsize=22)

        #### clean up panel ####
        # fix viewing limits on panel
        ax.set_xlim([-5, 5])
        ax.set_ylim(
            [min(min(g_plot) - ggap, -0.5),
             max(max(g_plot) + ggap, 0.5)])

        # label axes
        ax.set_xlabel('$w$', fontsize=25)
        ax.set_ylabel('$g(w)$', fontsize=25, rotation=0, labelpad=50)
        ax.grid(False)
        ax.yaxis.set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.spines['left'].set_visible(False)
        for tick in ax.xaxis.get_major_ticks():
            tick.label.set_fontsize(18)

        return artist,

    anim = animation.FuncAnimation(fig,
                                   animate,
                                   frames=len(w_vals),
                                   interval=len(w_vals),
                                   blit=True)

    return (anim)
    def newtons_method(self, g, w_hist, **kwargs):
        # compute gradient and hessian of input
        grad = compute_grad(g)  # gradient of input function
        hess = compute_hess(g)  # hessian of input function

        # set viewing range
        wmax = 3
        if 'wmax' in kwargs:
            wmax = kwargs['wmax']
        wmin = -wmax
        if 'wmin' in kwargs:
            wmin = kwargs['wmin']

        # initialize figure
        fig = plt.figure(figsize=(9, 4))
        artist = fig

        # create subplot with 3 panels, plot input function in center plot
        gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1])

        ax1 = plt.subplot(gs[0])
        ax1.axis('off')
        ax3 = plt.subplot(gs[2])
        ax3.axis('off')
        ax = plt.subplot(gs[1])

        # generate function for plotting on each slide
        w_plot = np.linspace(wmin, wmax, 1000)
        g_plot = g(w_plot)
        g_range = max(g_plot) - min(g_plot)
        ggap = g_range * 0.1
        w_vals = np.linspace(-2.5, 2.5, 50)
        width = 1

        # make color spectrum for points
        colorspec = self.make_colorspec(w_hist)

        # animation sub-function
        print('starting animation rendering...')
        num_frames = 2 * len(w_hist) + 2

        def animate(t):
            ax.cla()
            k = math.floor((t + 1) / float(2))

            # print rendering update
            if np.mod(k + 1, 25) == 0:
                print('rendering animation frame ' + str(k + 1) + ' of ' +
                      str(num_frames))
            if t == num_frames - 1:
                print('animation rendering complete!')
                time.sleep(1.5)
                clear_output()

            # plot function
            ax.plot(w_plot, g_plot, color='k', zorder=1)  # plot function

            # plot initial point and evaluation
            if k == 0:
                w_val = w_hist[0]
                g_val = g(w_val)
                ax.scatter(w_val,
                           g_val,
                           s=100,
                           c=colorspec[k],
                           edgecolor='k',
                           linewidth=0.7,
                           marker='X',
                           zorder=2)  # plot point of tangency
                ax.scatter(w_val,
                           0,
                           s=100,
                           c=colorspec[k],
                           edgecolor='k',
                           linewidth=0.7,
                           zorder=2)
                # draw dashed line connecting w axis to point on cost function
                s = np.linspace(0, g_val)
                o = np.ones((len(s)))
                ax.plot(o * w_val, s, 'k--', linewidth=1, zorder=0)

            # plot all input/output pairs generated by algorithm thus far
            if k > 0:
                # plot all points up to this point
                for j in range(min(k - 1, len(w_hist))):
                    w_val = w_hist[j]
                    g_val = g(w_val)
                    ax.scatter(w_val,
                               g_val,
                               s=90,
                               c=colorspec[j],
                               edgecolor='k',
                               marker='X',
                               linewidth=0.7,
                               zorder=3)  # plot point of tangency
                    ax.scatter(w_val,
                               0,
                               s=90,
                               facecolor=colorspec[j],
                               edgecolor='k',
                               linewidth=0.7,
                               zorder=2)

            # plot surrogate function and travel-to point
            if k > 0 and k < len(w_hist) + 1:
                # grab historical weight, compute function and derivative evaluations
                w_eval = w_hist[k - 1]
                if type(w_eval) != float:
                    w_eval = float(w_eval)

                # plug in value into func and derivative
                g_eval = g(w_eval)
                g_grad_eval = grad(w_eval)
                g_hess_eval = hess(w_eval)

                # determine width of plotting area for second order approximator
                width = 0.5
                if g_hess_eval < 0:
                    width = -width

                # setup quadratic formula params
                a = 0.5 * g_hess_eval
                b = g_grad_eval - 2 * 0.5 * g_hess_eval * w_eval
                c = 0.5 * g_hess_eval * w_eval**2 - g_grad_eval * w_eval - width

                # solve for zero points
                w1 = (-b + math.sqrt(b**2 - 4 * a * c)) / float(2 * a +
                                                                0.00001)
                w2 = (-b - math.sqrt(b**2 - 4 * a * c)) / float(2 * a +
                                                                0.00001)

                # compute second order approximation
                wrange = np.linspace(w1, w2, 100)
                h = g_eval + g_grad_eval * (
                    wrange - w_eval) + 0.5 * g_hess_eval * (wrange - w_eval)**2

                # plot tangent curve
                ax.plot(wrange,
                        h,
                        color=colorspec[k - 1],
                        linewidth=2,
                        zorder=2)  # plot approx

                # plot tangent point
                ax.scatter(w_eval,
                           g_eval,
                           s=100,
                           c='m',
                           edgecolor='k',
                           marker='X',
                           linewidth=0.7,
                           zorder=3)  # plot point of tangency

                # plot next point learned from surrogate
                if np.mod(t, 2) == 0:
                    # create next point information
                    w_zero = w_eval - g_grad_eval / (g_hess_eval + 10**-5)
                    g_zero = g(w_zero)
                    h_zero = g_eval + g_grad_eval * (
                        w_zero - w_eval) + 0.5 * g_hess_eval * (w_zero -
                                                                w_eval)**2

                    # draw dashed line connecting the three
                    vals = [0, h_zero, g_zero]
                    vals = np.sort(vals)

                    s = np.linspace(vals[0], vals[2])
                    o = np.ones((len(s)))
                    ax.plot(o * w_zero, s, 'k--', linewidth=1)

                    # draw intersection at zero and associated point on cost function you hop back too
                    ax.scatter(w_zero,
                               h_zero,
                               s=100,
                               c='b',
                               linewidth=0.7,
                               marker='X',
                               edgecolor='k',
                               zorder=3)
                    ax.scatter(w_zero,
                               0,
                               s=100,
                               c='m',
                               edgecolor='k',
                               linewidth=0.7,
                               zorder=3)
                    ax.scatter(w_zero,
                               g_zero,
                               s=100,
                               c='m',
                               edgecolor='k',
                               linewidth=0.7,
                               marker='X',
                               zorder=3)  # plot point of tangency

            # fix viewing limits on panel
            ax.set_xlim([wmin, wmax])
            ax.set_ylim(
                [min(-0.3,
                     min(g_plot) - ggap),
                 max(max(g_plot) + ggap, 0.3)])

            # add horizontal axis
            ax.axhline(y=0, color='k', zorder=0, linewidth=0.5)

            # label axes
            ax.set_xlabel(r'$w$', fontsize=14)
            ax.set_ylabel(r'$g(w)$', fontsize=14, rotation=0, labelpad=25)

            # set tickmarks
            ax.set_xticks(np.arange(round(wmin), round(wmax) + 1, 1.0))
            ax.set_yticks(
                np.arange(round(min(g_plot) - ggap),
                          round(max(g_plot) + ggap) + 1, 1.0))

            return artist,

        anim = animation.FuncAnimation(fig,
                                       animate,
                                       frames=num_frames,
                                       interval=num_frames,
                                       blit=True)

        return (anim)
Esempio n. 12
0
def visualize3d(func, **kwargs):
    grad = compute_grad(func)  # gradient of input function
    colors = [[0, 1, 0.25], [0, 0.75,
                             1]]  # set of custom colors used for plotting

    num_frames = 10
    if 'num_frames' in kwargs:
        num_frames = kwargs['num_frames']

    view = [20, -50]
    if 'view' in kwargs:
        view = kwargs['view']

    plot_descent = False
    if 'plot_descent' in kwargs:
        plot_descent = kwargs['plot_descent']

    pt1 = [0, 0]
    pt2 = [-0.5, 0.5]
    if 'pt' in kwargs:
        pt1 = kwargs['pt']
    if 'pt2' in kwargs:
        pt2 = kwargs['pt2']

    # construct figure
    fig = plt.figure(figsize=(9, 6))

    # remove whitespace from figure
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1)  # remove whitespace
    fig.subplots_adjust(wspace=0.01, hspace=0.01)

    # create subplotting mechanism
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0], projection='3d')

    # define input space
    w_in = np.linspace(-2, 2, 200)
    w1_vals, w2_vals = np.meshgrid(w_in, w_in)
    w1_vals.shape = (len(w_in)**2, 1)
    w2_vals.shape = (len(w_in)**2, 1)
    w_vals = np.concatenate((w1_vals, w2_vals), axis=1).T
    g_vals = func(w_vals)
    cont = 1
    for pt in [pt1]:
        # create axis for plotting
        if cont == 1:
            ax = ax1
        if cont == 2:
            ax = ax2

        cont += 1
        # evaluation points
        w_val = np.array([float(pt[0]), float(pt[1])])
        w_val.shape = (2, 1)
        g_val = func(w_val)
        grad_val = grad(w_val)
        grad_val.shape = (2, 1)

        # create and evaluate tangent hyperplane
        w_tan = np.linspace(-1, 1, 200)
        w1tan_vals, w2tan_vals = np.meshgrid(w_tan, w_tan)
        w1tan_vals.shape = (len(w_tan)**2, 1)
        w2tan_vals.shape = (len(w_tan)**2, 1)
        wtan_vals = np.concatenate((w1tan_vals, w2tan_vals), axis=1).T

        #h = lambda weh: g_val +  np.dot( (weh - w_val).T,grad_val)
        h = lambda weh: g_val + (weh[0] - w_val[0]) * grad_val[0] + (weh[
            1] - w_val[1]) * grad_val[1]
        h_vals = h(wtan_vals + w_val)
        zmin = min(np.min(h_vals), -0.5)
        zmax = max(np.max(h_vals), +0.5)

        # vals for cost surface, reshape for plot_surface function
        w1_vals.shape = (len(w_in), len(w_in))
        w2_vals.shape = (len(w_in), len(w_in))
        g_vals.shape = (len(w_in), len(w_in))
        w1tan_vals += w_val[0]
        w2tan_vals += w_val[1]
        w1tan_vals.shape = (len(w_tan), len(w_tan))
        w2tan_vals.shape = (len(w_tan), len(w_tan))
        h_vals.shape = (len(w_tan), len(w_tan))

        ### plot function ###
        ax.plot_surface(w1_vals,
                        w2_vals,
                        g_vals,
                        alpha=0.1,
                        color='w',
                        rstride=25,
                        cstride=25,
                        linewidth=1,
                        edgecolor='k',
                        zorder=2)

        ### plot z=0 plane ###
        ax.plot_surface(w1_vals,
                        w2_vals,
                        g_vals * 0,
                        alpha=0.1,
                        color='w',
                        zorder=1,
                        rstride=25,
                        cstride=25,
                        linewidth=0.3,
                        edgecolor='k')

        ### plot tangent plane ###
        ax.plot_surface(w1tan_vals,
                        w2tan_vals,
                        h_vals,
                        alpha=0.1,
                        color='lime',
                        zorder=1,
                        rstride=50,
                        cstride=50,
                        linewidth=1,
                        edgecolor='k')

        ### plot particular points - origins and tangency ###
        # scatter origin
        ax.scatter(0, 0, 0, s=60, c='k', edgecolor='w', linewidth=2)

        # scatter tangency
        ax.scatter(w_val[0],
                   w_val[1],
                   g_val,
                   s=70,
                   c='lime',
                   edgecolor='k',
                   linewidth=2)

        ##### add arrows and annotations for steepest ascent direction #####
        # re-assign func variable to tangent
        cutoff_val = 0.1
        an = 1.7
        pname = 'g(' + str(pt[0]) + ',' + str(pt[1]) + ')'
        s = h([1, 0]) - h([0, 0])
        if abs(s) > cutoff_val:
            # draw arrow
            a = Arrow3D([0, s], [0, 0], [0, 0],
                        mutation_scale=20,
                        lw=2,
                        arrowstyle="-|>",
                        color="b")
            ax.add_artist(a)

            # label arrow
            q = h([an, 0]) - h([0, 0])
            name = r'$\left(\frac{\mathrm{d}}{\mathrm{d}w_1}' + pname + r',0\right)$'
            annotate3D(ax,
                       s=name,
                       xyz=[q, 0, 0],
                       fontsize=12,
                       xytext=(-3, 3),
                       textcoords='offset points',
                       ha='center',
                       va='center')

        t = h([0, 1]) - h([0, 0])
        if abs(t) > cutoff_val:
            # draw arrow
            a = Arrow3D([0, 0], [0, t], [0, 0],
                        mutation_scale=20,
                        lw=2,
                        arrowstyle="-|>",
                        color="b")
            ax.add_artist(a)

            # label arrow
            q = h([0, an]) - h([0, 0])
            name = r'$\left(0,\frac{\mathrm{d}}{\mathrm{d}w_2}' + pname + r'\right)$'
            annotate3D(ax,
                       s=name,
                       xyz=[0, q, 0],
                       fontsize=12,
                       xytext=(-3, 3),
                       textcoords='offset points',
                       ha='center',
                       va='center')

        # full gradient
        if abs(s) > cutoff_val and abs(t) > cutoff_val:
            a = Arrow3D([0, h([1, 0]) - h([0, 0])],
                        [0, h([0, 1]) - h([0, 0])], [0, 0],
                        mutation_scale=20,
                        lw=2,
                        arrowstyle="-|>",
                        color="k")
            ax.add_artist(a)

            s = h([an + 0.2, 0]) - h([0, 0])
            t = h([0, an + 0.2]) - h([0, 0])
            name = r'$\left(\frac{\mathrm{d}}{\mathrm{d}w_1}' + pname + r',\frac{\mathrm{d}}{\mathrm{d}w_2}' + pname + r'\right)$'
            annotate3D(ax,
                       s=name,
                       xyz=[s, t, 0],
                       fontsize=12,
                       xytext=(-3, 3),
                       textcoords='offset points',
                       ha='center',
                       va='center')

        ###### add arrow and text for steepest descent direction #####
        if plot_descent == True:
            # full negative gradient
            if abs(s) > cutoff_val and abs(t) > cutoff_val:
                a = Arrow3D([0, -(h([1, 0]) - h([0, 0]))],
                            [0, -(h([0, 1]) - h([0, 0]))], [0, 0],
                            mutation_scale=20,
                            lw=2,
                            arrowstyle="-|>",
                            color="r")
                ax.add_artist(a)

                s = -(h([an + 0.2, 0]) - h([0, 0]))
                t = -(h([0, an + 0.2]) - h([0, 0]))
                name = r'$\left(-\frac{\mathrm{d}}{\mathrm{d}w_1}' + pname + r',-\frac{\mathrm{d}}{\mathrm{d}w_2}' + pname + r'\right)$'
                annotate3D(ax,
                           s=name,
                           xyz=[s, t, 0],
                           fontsize=12,
                           xytext=(-3, 3),
                           textcoords='offset points',
                           ha='center',
                           va='center')

        ### clean up plot ###
        # plot x and y axes, and clean up
        ax.xaxis.pane.fill = False
        ax.yaxis.pane.fill = False
        ax.zaxis.pane.fill = False

        ax.xaxis.pane.set_edgecolor('white')
        ax.yaxis.pane.set_edgecolor('white')
        ax.zaxis.pane.set_edgecolor('white')

        # remove axes lines and tickmarks
        ax.w_zaxis.line.set_lw(0.)
        ax.set_zticks([])
        ax.w_xaxis.line.set_lw(0.)
        ax.set_xticks([])
        ax.w_yaxis.line.set_lw(0.)
        ax.set_yticks([])

        # set viewing angle
        ax.view_init(view[0], view[1])

        # set vewing limits
        y = 4.5
        ax.set_xlim([-y, y])
        ax.set_ylim([-y, y])
        ax.set_zlim([zmin, zmax])

        # label plot
        fontsize = 14
        ax.set_xlabel(r'$w_1$', fontsize=fontsize, labelpad=-20)
        ax.set_ylabel(r'$w_2$', fontsize=fontsize, rotation=0, labelpad=-30)
    # plot
    plt.show()
Esempio n. 13
0
def compare_2d3d(func1, func2, **kwargs):
    view = [20, -50]
    if 'view' in kwargs:
        view = kwargs['view']

    # construct figure
    fig = plt.figure(figsize=(12, 4))

    # remove whitespace from figure
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1)  # remove whitespace
    fig.subplots_adjust(wspace=0.01, hspace=0.01)

    # create subplot with 3 panels, plot input function in center plot
    gs = gridspec.GridSpec(1, 3, width_ratios=[1, 2, 4])

    ### draw 2d version ###
    ax1 = plt.subplot(gs[1])
    grad = compute_grad(func1)

    # generate a range of values over which to plot input function, and derivatives
    w_plot = np.linspace(-3, 3, 200)  # input range for original function
    g_plot = func1(w_plot)
    g_range = max(g_plot) - min(g_plot)  # used for cleaning up final plot
    ggap = g_range * 0.2
    w_vals = np.linspace(-2.5, 2.5, 200)

    # grab the next input/output tangency pair, the center of the next approximation(s)
    w_val = float(0)
    g_val = func1(w_val)

    # plot original function
    ax1.plot(w_plot, g_plot, color='k', zorder=1, linewidth=2)

    # plot axis
    ax1.plot(w_plot, g_plot * 0, color='k', zorder=1, linewidth=1)
    # plot the input/output tangency point
    ax1.scatter(w_val,
                g_val,
                s=80,
                c='lime',
                edgecolor='k',
                linewidth=2,
                zorder=3)  # plot point of tangency

    #### plot first order approximation ####
    # plug input into the first derivative
    g_grad_val = grad(w_val)

    # determine width to plot the approximation -- so its length == width
    width = 4
    div = float(1 + g_grad_val**2)
    w1 = w_val - math.sqrt(width / div)
    w2 = w_val + math.sqrt(width / div)

    # compute first order approximation
    wrange = np.linspace(w1, w2, 100)
    h = g_val + g_grad_val * (wrange - w_val)

    # plot the first order approximation
    ax1.plot(wrange, h, color='lime', alpha=0.5, linewidth=3,
             zorder=2)  # plot approx

    #### clean up panel ####
    # fix viewing limits on panel
    v = 5
    ax1.set_xlim([-v, v])
    ax1.set_ylim([-1 - 0.3, v - 0.3])

    # label axes
    ax1.set_xlabel('$w$', fontsize=12, labelpad=-60)
    ax1.set_ylabel('$g(w)$', fontsize=25, rotation=0, labelpad=50)
    ax1.grid(False)
    ax1.yaxis.set_visible(False)
    ax1.spines['right'].set_visible(False)
    ax1.spines['top'].set_visible(False)
    ax1.spines['left'].set_visible(False)

    ### draw 3d version ###
    ax2 = plt.subplot(gs[2], projection='3d')
    grad = compute_grad(func2)
    w_val = [float(0), float(0)]

    # define input space
    w_in = np.linspace(-2, 2, 200)
    w1_vals, w2_vals = np.meshgrid(w_in, w_in)
    w1_vals.shape = (len(w_in)**2, 1)
    w2_vals.shape = (len(w_in)**2, 1)
    w_vals = np.concatenate((w1_vals, w2_vals), axis=1).T
    g_vals = func2(w_vals)

    # evaluation points
    w_val = np.array([float(w_val[0]), float(w_val[1])])
    w_val.shape = (2, 1)
    g_val = func2(w_val)
    grad_val = grad(w_val)
    grad_val.shape = (2, 1)

    # create and evaluate tangent hyperplane
    w_tan = np.linspace(-1, 1, 200)
    w1tan_vals, w2tan_vals = np.meshgrid(w_tan, w_tan)
    w1tan_vals.shape = (len(w_tan)**2, 1)
    w2tan_vals.shape = (len(w_tan)**2, 1)
    wtan_vals = np.concatenate((w1tan_vals, w2tan_vals), axis=1).T

    #h = lambda weh: g_val +  np.dot( (weh - w_val).T,grad_val)
    h = lambda weh: g_val + (weh[0] - w_val[0]) * grad_val[0] + (weh[
        1] - w_val[1]) * grad_val[1]
    h_vals = h(wtan_vals + w_val)
    zmin = min(np.min(h_vals), -0.5)
    zmax = max(np.max(h_vals), +0.5)

    # vals for cost surface, reshape for plot_surface function
    w1_vals.shape = (len(w_in), len(w_in))
    w2_vals.shape = (len(w_in), len(w_in))
    g_vals.shape = (len(w_in), len(w_in))
    w1tan_vals += w_val[0]
    w2tan_vals += w_val[1]
    w1tan_vals.shape = (len(w_tan), len(w_tan))
    w2tan_vals.shape = (len(w_tan), len(w_tan))
    h_vals.shape = (len(w_tan), len(w_tan))

    ### plot function ###
    ax2.plot_surface(w1_vals,
                     w2_vals,
                     g_vals,
                     alpha=0.5,
                     color='w',
                     rstride=25,
                     cstride=25,
                     linewidth=1,
                     edgecolor='k',
                     zorder=2)

    ### plot z=0 plane ###
    ax2.plot_surface(w1_vals,
                     w2_vals,
                     g_vals * 0,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=25,
                     cstride=25,
                     linewidth=0.3,
                     edgecolor='k')

    ### plot tangent plane ###
    ax2.plot_surface(w1tan_vals,
                     w2tan_vals,
                     h_vals,
                     alpha=0.4,
                     color='lime',
                     zorder=1,
                     rstride=50,
                     cstride=50,
                     linewidth=1,
                     edgecolor='k')

    # scatter tangency
    ax2.scatter(w_val[0],
                w_val[1],
                g_val,
                s=70,
                c='lime',
                edgecolor='k',
                linewidth=2)

    ### clean up plot ###
    # plot x and y axes, and clean up
    ax2.xaxis.pane.fill = False
    ax2.yaxis.pane.fill = False
    ax2.zaxis.pane.fill = False

    ax2.xaxis.pane.set_edgecolor('white')
    ax2.yaxis.pane.set_edgecolor('white')
    ax2.zaxis.pane.set_edgecolor('white')

    # remove axes lines and tickmarks
    ax2.w_zaxis.line.set_lw(0.)
    ax2.set_zticks([])
    ax2.w_xaxis.line.set_lw(0.)
    ax2.set_xticks([])
    ax2.w_yaxis.line.set_lw(0.)
    ax2.set_yticks([])

    # set viewing angle
    ax2.view_init(20, -65)

    # set vewing limits
    y = 4
    ax2.set_xlim([-y, y])
    ax2.set_ylim([-y, y])
    ax2.set_zlim([zmin, zmax])

    # label plot
    fontsize = 12
    ax2.set_xlabel(r'$w_1$', fontsize=fontsize, labelpad=-35)
    ax2.set_ylabel(r'$w_2$', fontsize=fontsize, rotation=0, labelpad=-40)

    plt.show()
Esempio n. 14
0
    def gradient_descent(self, g, w, **kwargs):
        # flatten function
        self.g, unflatten, w = flatten_func(g, w)
        self.grad = compute_grad(self.g)

        # parse optional arguments
        max_its = 100
        if 'max_its' in kwargs:
            max_its = kwargs['max_its']
        version = 'unnormalized'
        if 'version' in kwargs:
            version = kwargs['version']
        alpha = 10**-4
        if 'alpha' in kwargs:
            alpha = kwargs['alpha']
        steplength_rule = 'none'
        if 'steplength_rule' in kwargs:
            steplength_rule = kwargs['steplength_rule']
        projection = 'None'
        if 'projection' in kwargs:
            projection = kwargs['projection']
        output = 'history'
        if 'output' in kwargs:
            output = kwargs['output']
        diminish_num = 10
        if 'diminish_num' in kwargs:
            diminish_num = kwargs['diminish_num']
        verbose = True
        if 'verbose' in kwargs:
            verbose = kwargs['verbose']

        # create container for weight history
        w_hist = []
        g_best = np.inf
        w_best = unflatten(copy.deepcopy(w))

        if output == 'history':
            w_hist.append(unflatten(w))

        # start gradient descent loop
        if verbose == True:
            print('starting optimization...')
        d = 1  # diminish count
        for k in range(max_its):
            # plug in value into func and derivative
            grad_eval = self.grad(w)
            grad_eval.shape = np.shape(w)

            ### normalized or unnormalized descent step? ###
            if version == 'normalized':
                grad_norm = np.linalg.norm(grad_eval)
                if grad_norm == 0:
                    grad_norm += 10**-6 * np.sign(2 * np.random.rand(1) - 1)
                grad_eval /= grad_norm

            ### decide on steplength parameter alpha ###
            # a fixed step?
            # alpha = alpha

            # print out progress
            if np.mod(k, 100) == 0 and k > 0:
                print(str(k) + ' of ' + str(max_its) + ' iterations complete')

            # use backtracking line search?
            if steplength_rule == 'backtracking':
                alpha = self.backtracking(w, grad_eval)

            # use a pre-set diminishing steplength parameter?
            if steplength_rule == 'diminishing':
                alpha = 1 / (float(d))
                if np.mod(k, diminish_num) == 0 and k > 0:
                    d += 1

            ### take gradient descent step ###
            w = w - alpha * grad_eval

            ### projection? ###
            if 'projection' in kwargs:
                w = projection(w)

            # record weight for history
            if output == 'history':
                w_hist.append(unflatten(w))
            if output == 'best':
                if self.g(w) < g_best:
                    g_best = self.g(w)
                    w_best = unflatten(w)

        if verbose == True:
            print('...optimization complete!')
            time.sleep(1.5)
            clear_output()

        # return
        if output == 'history':
            return w_hist
        if output == 'best':
            return w_best
 def __init__(self, **args):
     self.g = args['g']  # input function
     self.grad = compute_grad(self.g)  # gradient of input function
     self.hess = compute_grad(self.grad)  # hessian of input function
     self.colors = [[0, 1, 0.25],
                    [0, 0.75, 1]]  # set of custom colors used for plotting
Esempio n. 16
0
    def __init__(self, **args):
        # get some crucial parameters from the input gridworld
        self.grid = args['gridworld']

        # initialize q-learning params
        self.gamma = 1
        self.max_steps = 5 * self.grid.width * self.grid.height
        self.exploit_param = 0.5
        self.action_method = 'exploit'
        self.training_episodes = 500
        self.validation_episodes = 50
        self.training_start_schedule = []
        self.validation_start_schedule = []

        # swap out for user defined q-learning params if desired
        if "gamma" in args:
            self.gamma = args['gamma']
        if 'max_steps' in args:
            self.max_steps = args['max_steps']
        if 'action_method' in args:
            self.action_method = args['action_method']
        if 'exploit_param' in args:
            self.exploit = args['exploit_param']
            self.action_method = 'exploit'
        if 'training_episodes' in args:
            self.training_episodes = args['training_episodes']
            # return error if number of training episodes is too big
        if self.training_episodes > self.grid.training_episodes:
            print 'requesting too many training episodes, the maximum num = ' + str(
                self.grid.training_episodes)
            return
        self.training_start_schedule = self.grid.training_start_schedule[:self.
                                                                         training_episodes]
        if 'validation_episodes' in args:
            self.validation_episodes = args['validation_episodes']
            # return error if number of training episodes is too big
        if self.validation_episodes > self.grid.validation_episodes:
            print 'requesting too many validation episodes, the maximum num = ' + str(
                self.grid.validation_episodes)
            return
        self.validation_start_schedule = self.grid.validation_start_schedule[:
                                                                             self
                                                                             .
                                                                             validation_episodes]

        ##### import function approximators class #####
        # initialize function approximation params and weights
        self.deg = 1
        if 'degree' in args:
            self.deg = args['degree']

        self.step_size = 1 / float(
            max(self.grid.height, self.grid.width) * self.deg) * 10**-5
        if 'step_size' in args:
            self.step_size = args['step_size']

        # switch for choosing various nonlinear approximators
        self.h = 0
        self.W = 0
        self.num_actions = 4
        if args['approximator'] == 'linear':
            self.h = self.linear_approximator

            # initialize weight matrix for function approximator
            self.W = np.random.randn(
                self.num_actions, self.deg, 1 + 2
            )  # the number of weights per function --> 1 bias, 2 state touching weights (one per state dim)

        if args['approximator'] == 'cosine':
            self.h = self.cosine_approximator

            # initialize weight matrix for function approximator
            self.W = np.random.randn(
                self.num_actions, self.deg, 2
            )  # the number of weights per function --> 1 bias, 2 touching cosine
        self.W = self.W.astype('float')

        # compute gradient of approximator for later use
        self.h_grad = compute_grad(self.h)
    def animate_2d(self, g, w_hist, **kwargs):
        self.g = g  # input function
        self.w_hist = w_hist  # input weight history
        self.grad = compute_grad(self.g)  # gradient of input function
        self.w_init = self.w_hist[
            0]  # user-defined initial point (adjustable when calling each algorithm)

        wmin = -3.1
        wmax = 3.1
        if 'wmin' in kwargs:
            wmin = kwargs['wmin']
        if 'wmax' in kwargs:
            wmax = kwargs['wmax']

        # initialize figure
        fig = plt.figure(figsize=(9, 4))
        artist = fig

        # remove whitespace from figure
        #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace
        #fig.subplots_adjust(wspace=0.01,hspace=0.01)

        # create subplot with 3 panels, plot input function in center plot
        gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1])

        ax1 = plt.subplot(gs[0])
        ax1.axis('off')
        ax3 = plt.subplot(gs[2])
        ax3.axis('off')
        ax = plt.subplot(gs[1])

        # generate function for plotting on each slide
        w_plot = np.linspace(wmin, wmax, 200)
        g_plot = self.g(w_plot)
        g_range = max(g_plot) - min(g_plot)
        ggap = g_range * 0.1
        width = 30

        # colors for points --> green as the algorithm begins, yellow as it converges, red at final point
        s = np.linspace(0, 1, len(self.w_hist[:round(len(self.w_hist) / 2)]))
        s.shape = (len(s), 1)
        t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):]))
        t.shape = (len(t), 1)
        s = np.vstack((s, t))
        self.colorspec = []
        self.colorspec = np.concatenate((s, np.flipud(s)), 1)
        self.colorspec = np.concatenate((self.colorspec, np.zeros(
            (len(s), 1))), 1)

        # animation sub-function
        num_frames = 2 * len(self.w_hist) + 2
        print('starting animation rendering...')

        def animate(t):
            ax.cla()
            k = math.floor((t + 1) / float(2))

            # print rendering update
            if np.mod(t + 1, 25) == 0:
                print('rendering animation frame ' + str(t + 1) + ' of ' +
                      str(num_frames))
            if t == num_frames - 1:
                print('animation rendering complete!')
                time.sleep(1.5)
                clear_output()

            # plot function
            ax.plot(w_plot, g_plot, color='k', zorder=2)  # plot function

            # plot initial point and evaluation
            if k == 0:
                w_val = self.w_init
                g_val = self.g(w_val)
                ax.scatter(w_val,
                           g_val,
                           s=90,
                           c=self.colorspec[k],
                           edgecolor='k',
                           linewidth=0.5 * ((1 / (float(k) + 1)))**(0.4),
                           zorder=3,
                           marker='X')  # evaluation on function
                ax.scatter(w_val,
                           0,
                           s=90,
                           facecolor=self.colorspec[k],
                           edgecolor='k',
                           linewidth=0.5 * ((1 / (float(k) + 1)))**(0.4),
                           zorder=3)

                # draw dashed line connecting w axis to point on cost function
                s = np.linspace(0, g_val)
                o = np.ones((len(s)))
                ax.plot(o * w_val, s, 'k--', linewidth=1)

            # plot all input/output pairs generated by algorithm thus far
            if k > 0:
                # plot all points up to this point
                for j in range(min(k - 1, len(self.w_hist))):
                    w_val = self.w_hist[j]
                    g_val = self.g(w_val)
                    ax.scatter(w_val,
                               g_val,
                               s=90,
                               c=self.colorspec[j],
                               edgecolor='k',
                               linewidth=0.5 * ((1 / (float(j) + 1)))**(0.4),
                               zorder=3,
                               marker='X')  # plot point of tangency
                    ax.scatter(w_val,
                               0,
                               s=90,
                               facecolor=self.colorspec[j],
                               edgecolor='k',
                               linewidth=0.5 * ((1 / (float(j) + 1)))**(0.4),
                               zorder=2)

            # plot surrogate function and travel-to point
            if k > 0 and k < len(self.w_hist) + 1:
                # grab historical weight, compute function and derivative evaluations
                w = self.w_hist[k - 1]
                g_eval = self.g(w)
                grad_eval = float(self.grad(w))

                # determine width to plot the approximation -- so its length == width defined above
                div = float(1 + grad_eval**2)
                w1 = w - math.sqrt(width / div)
                w2 = w + math.sqrt(width / div)

                # use point-slope form of line to plot
                wrange = np.linspace(w1, w2, 100)
                h = g_eval + grad_eval * (wrange - w)

                # plot tangent line
                ax.plot(wrange,
                        h,
                        color=self.colorspec[k - 1],
                        linewidth=2,
                        zorder=1)  # plot approx

                # plot tangent point
                ax.scatter(w,
                           g_eval,
                           s=100,
                           c='m',
                           edgecolor='k',
                           linewidth=0.7,
                           zorder=3,
                           marker='X')  # plot point of tangency

                # plot next point learned from surrogate
                if np.mod(t, 2) == 0 and k < len(self.w_hist) - 1:
                    # create next point information
                    w_zero = self.w_hist[k]
                    g_zero = self.g(w_zero)
                    h_zero = g_eval + grad_eval * (w_zero - w)

                    # draw dashed line connecting the three
                    vals = [0, h_zero, g_zero]
                    vals = np.sort(vals)

                    s = np.linspace(vals[0], vals[2])
                    o = np.ones((len(s)))
                    ax.plot(o * w_zero, s, 'k--', linewidth=1)

                    # draw intersection at zero and associated point on cost function you hop back too
                    ax.scatter(w_zero,
                               h_zero,
                               s=100,
                               c='k',
                               zorder=3,
                               marker='X')
                    ax.scatter(w_zero,
                               0,
                               s=100,
                               c='m',
                               edgecolor='k',
                               linewidth=0.7,
                               zorder=3)
                    ax.scatter(w_zero,
                               g_zero,
                               s=100,
                               c='m',
                               edgecolor='k',
                               linewidth=0.7,
                               zorder=3,
                               marker='X')  # plot point of tangency

            # fix viewing limits
            ax.set_xlim([wmin - 0.1, wmax + 0.1])
            ax.set_ylim([min(g_plot) - ggap, max(g_plot) + ggap])
            ax.axhline(y=0, color='k', zorder=0, linewidth=0.5)

            # place title
            ax.set_xlabel(r'$w$', fontsize=14)
            ax.set_ylabel(r'$g(w)$', fontsize=14, rotation=0, labelpad=25)

            return artist,

        anim = animation.FuncAnimation(fig,
                                       animate,
                                       frames=num_frames,
                                       interval=num_frames,
                                       blit=True)

        return (anim)
    def gradient_descent(self, g, w, **kwargs):
        # create gradient function
        self.g = g
        self.grad = compute_grad(self.g)

        # parse optional arguments
        max_its = 100
        if 'max_its' in kwargs:
            max_its = kwargs['max_its']
        version = 'unnormalized'
        if 'version' in kwargs:
            version = kwargs['version']
        alpha = 10**-4
        if 'alpha' in kwargs:
            alpha = kwargs['alpha']
        steplength_rule = 'none'
        if 'steplength_rule' in kwargs:
            steplength_rule = kwargs['steplength_rule']
        projection = 'None'
        if 'projection' in kwargs:
            projection = kwargs['projection']
        verbose = False
        if 'verbose' in kwargs:
            verbose = kwargs['verbose']

        # create container for weight history
        w_hist = []
        w_hist.append(w)

        # start gradient descent loop
        if verbose == True:
            print('starting optimization...')
        for k in range(max_its):
            # plug in value into func and derivative
            grad_eval = self.grad(w)
            grad_eval.shape = np.shape(w)

            ### normalized or unnormalized descent step? ###
            if version == 'normalized':
                grad_norm = np.linalg.norm(grad_eval)
                if grad_norm == 0:
                    grad_norm += 10**-6 * np.sign(2 * np.random.rand(1) - 1)
                grad_eval /= grad_norm

            # use backtracking line search?
            if steplength_rule == 'backtracking':
                alpha = self.backtracking(w, grad_eval)

            # use a pre-set diminishing steplength parameter?
            if steplength_rule == 'diminishing':
                alpha = 1 / (float(k + 1))

            ### take gradient descent step ###
            w = w - alpha * grad_eval

            # record
            w_hist.append(w)

        if verbose == True:
            print('...optimization complete!')
            time.sleep(1.5)
            clear_output()

        return w_hist
    def draw_2d(self, **kwargs):
        self.g = kwargs['g']  # input function
        self.grad = compute_grad(self.g)  # gradient of input function
        self.w_init = float(
            -2
        )  # user-defined initial point (adjustable when calling each algorithm)
        self.alpha = 10**-4  # user-defined step length for gradient descent (adjustable when calling gradient descent)
        self.max_its = 20  # max iterations to run for each algorithm
        self.w_hist = []  # container for algorithm path

        wmin = -3.1
        wmax = 3.1
        if 'wmin' in kwargs:
            wmin = kwargs['wmin']
        if 'wmax' in kwargs:
            wmax = kwargs['wmax']

        # get new initial point if desired
        if 'w_inits' in kwargs:
            self.w_inits = kwargs['w_inits']
            self.w_inits = [float(s) for s in self.w_inits]

        # take in user defined step length
        if 'steplength' in kwargs:
            self.steplength = kwargs['steplength']

        # take in user defined maximum number of iterations
        if 'max_its' in kwargs:
            self.max_its = float(kwargs['max_its'])

        # version of gradient descent to use (normalized or unnormalized)
        self.version = 'unnormalized'
        if 'version' in kwargs:
            self.version = kwargs['version']

        # initialize figure
        fig = plt.figure(figsize=(9, 4))
        artist = fig

        # remove whitespace from figure
        #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace
        #fig.subplots_adjust(wspace=0.01,hspace=0.01)

        # create subplot with 2 panels, plot input function in center plot
        gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1])

        ax1 = plt.subplot(gs[0])
        ax2 = plt.subplot(gs[1])

        # generate function for plotting on each slide
        w_plot = np.linspace(wmin, wmax, 500)
        g_plot = self.g(w_plot)
        g_range = max(g_plot) - min(g_plot)
        ggap = g_range * 0.1
        width = 30

        #### loop over all initializations, run gradient descent algorithm for each and plot results ###
        for j in range(len(self.w_inits)):
            # get next initialization
            self.w_init = self.w_inits[j]

            # run grad descent for this init
            self.w_hist = []
            self.run_gradient_descent()

            # colors for points --> green as the algorithm begins, yellow as it converges, red at final point
            s = np.linspace(0, 1,
                            len(self.w_hist[:round(len(self.w_hist) / 2)]))
            s.shape = (len(s), 1)
            t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):]))
            t.shape = (len(t), 1)
            s = np.vstack((s, t))
            self.colorspec = []
            self.colorspec = np.concatenate((s, np.flipud(s)), 1)
            self.colorspec = np.concatenate(
                (self.colorspec, np.zeros((len(s), 1))), 1)

            # plot function, axes lines
            ax1.plot(w_plot, g_plot, color='k', zorder=2)  # plot function
            ax1.axhline(y=0, color='k', zorder=1, linewidth=0.25)
            ax1.axvline(x=0, color='k', zorder=1, linewidth=0.25)
            ax1.set_xlabel(r'$w$', fontsize=13)
            ax1.set_ylabel(r'$g(w)$', fontsize=13, rotation=0, labelpad=25)

            ax2.plot(w_plot, g_plot, color='k', zorder=2)  # plot function
            ax2.axhline(y=0, color='k', zorder=1, linewidth=0.25)
            ax2.axvline(x=0, color='k', zorder=1, linewidth=0.25)
            ax2.set_xlabel(r'$w$', fontsize=13)
            ax2.set_ylabel(r'$g(w)$', fontsize=13, rotation=0, labelpad=25)

            ### plot all gradient descent points ###
            for k in range(len(self.w_hist)):
                # pick out current weight and function value from history, then plot
                w_val = self.w_hist[k]
                g_val = self.g(w_val)

                ax2.scatter(w_val,
                            g_val,
                            s=90,
                            c=self.colorspec[k],
                            edgecolor='k',
                            linewidth=0.5 * ((1 / (float(k) + 1)))**(0.4),
                            zorder=3,
                            marker='X')  # evaluation on function
                ax2.scatter(w_val,
                            0,
                            s=90,
                            facecolor=self.colorspec[k],
                            edgecolor='k',
                            linewidth=0.5 * ((1 / (float(k) + 1)))**(0.4),
                            zorder=3)
Esempio n. 20
0
def draw_it(func, **kwargs):
    view = [10, 150]
    if 'view' in kwargs:
        view = kwargs['view']

    # generate input space for plotting
    w_in = np.linspace(-5, 5, 100)
    w1_vals, w2_vals = np.meshgrid(w_in, w_in)
    w1_vals.shape = (len(w_in)**2, 1)
    w2_vals.shape = (len(w_in)**2, 1)
    w_vals = np.concatenate((w1_vals, w2_vals), axis=1).T
    w1_vals.shape = (len(w_in), len(w_in))
    w2_vals.shape = (len(w_in), len(w_in))

    # compute grad vals
    grad = compute_grad(func)
    grad_vals = [grad(s) for s in w_vals.T]
    grad_vals = np.asarray(grad_vals)

    # compute hessian
    hess = hessian(func)
    hess_vals = [hess(s) for s in w_vals.T]

    # define figure
    fig = plt.figure(figsize=(9, 6))

    ###  plot original function ###
    ax1 = plt.subplot2grid((3, 6), (0, 3), colspan=1, projection='3d')

    # evaluate function, reshape
    g_vals = func(w_vals)
    g_vals.shape = (len(w_in), len(w_in))

    # plot function surface
    ax1.plot_surface(w1_vals,
                     w2_vals,
                     g_vals,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax1.set_title(r'$g(w_1,w_2)$', fontsize=10)

    # cleanup axis
    cleanup(g_vals, view, ax1)

    ### plot first derivative functions ###
    ax2 = plt.subplot2grid((3, 6), (1, 2), colspan=1, projection='3d')
    ax3 = plt.subplot2grid((3, 6), (1, 4), colspan=1, projection='3d')

    # plot first function
    grad_vals1 = grad_vals[:, 0]
    grad_vals1.shape = (len(w_in), len(w_in))
    ax2.plot_surface(w1_vals,
                     w2_vals,
                     grad_vals1,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax2.set_title(r'$\frac{\partial}{\partial w_1}g(w_1,w_2)$', fontsize=10)

    # cleanup axis
    cleanup(grad_vals1, view, ax2)

    # plot second
    grad_vals1 = grad_vals[:, 1]
    grad_vals1.shape = (len(w_in), len(w_in))
    ax3.plot_surface(w1_vals,
                     w2_vals,
                     grad_vals1,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax3.set_title(r'$\frac{\partial}{\partial w_2}g(w_1,w_2)$', fontsize=10)

    # cleanup axis
    cleanup(grad_vals1, view, ax3)

    ### plot second derivatives ###
    ax4 = plt.subplot2grid((3, 6), (2, 1), colspan=1, projection='3d')
    ax5 = plt.subplot2grid((3, 6), (2, 3), colspan=1, projection='3d')
    ax6 = plt.subplot2grid((3, 6), (2, 5), colspan=1, projection='3d')

    # plot first hessian function
    hess_vals1 = np.asarray([s[0, 0] for s in hess_vals])
    hess_vals1.shape = (len(w_in), len(w_in))
    ax4.plot_surface(w1_vals,
                     w2_vals,
                     hess_vals1,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax4.set_title(
        r'$\frac{\partial}{\partial w_1}\frac{\partial}{\partial w_1}g(w_1,w_2)$',
        fontsize=10)

    # cleanup axis
    cleanup(hess_vals1, view, ax4)

    # plot second hessian function
    hess_vals1 = np.asarray([s[1, 0] for s in hess_vals])
    hess_vals1.shape = (len(w_in), len(w_in))
    ax5.plot_surface(w1_vals,
                     w2_vals,
                     hess_vals1,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax5.set_title(
        r'$\frac{\partial}{\partial w_1}\frac{\partial}{\partial w_2}g(w_1,w_2)=\frac{\partial}{\partial w_2}\frac{\partial}{\partial w_1}g(w_1,w_2)$',
        fontsize=10)

    # cleanup axis
    cleanup(hess_vals1, view, ax5)

    # plot first hessian function
    hess_vals1 = np.asarray([s[1, 1] for s in hess_vals])
    hess_vals1.shape = (len(w_in), len(w_in))
    ax6.plot_surface(w1_vals,
                     w2_vals,
                     hess_vals1,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax6.set_title(
        r'$\frac{\partial}{\partial w_2}\frac{\partial}{\partial w_2}g(w_1,w_2)$',
        fontsize=10)

    # cleanup axis
    cleanup(hess_vals1, view, ax6)
    plt.show()