Python compute_hessの例、autograd.compute_hess Pythonの例

コード例 #1

0

ファイルを表示

ファイル: logistic_regression_simple_demos.py プロジェクト: zhucer2003/machine_learning_refined

    def run_algo(self, algo, **kwargs):
        # Get function and compute gradient
        self.g = self.linear_least_squares
        self.grad = compute_grad(self.g)

        # choose algorithm
        self.algo = algo
        if self.algo == 'gradient_descent':
            self.alpha = 10**-3
            if 'alpha' in kwargs:
                self.alpha = kwargs['alpha']

        self.max_its = 10
        if 'max_its' in kwargs:
            self.max_its = kwargs['max_its']

        self.w_init = np.random.randn(2)
        if 'w_init' in kwargs:
            self.w_init = kwargs['w_init']
            self.w_init = np.asarray([float(s) for s in self.w_init])
            self.w_init.shape = (np.size(self.w_init), 1)

        # run algorithm of choice
        if self.algo == 'gradient_descent':
            self.w_hist = []
            self.gradient_descent()
        if self.algo == 'newtons_method':
            self.hess = compute_hess(self.g)  # hessian of input function
            self.beta = 0
            if 'beta' in kwargs:
                self.beta = kwargs['beta']
            self.w_hist = []
            self.newtons_method()

コード例 #2

0

ファイルを表示

ファイル: optimimzers.py プロジェクト: zhucer2003/machine_learning_refined

    def newtons_method(self, g, w, **kwargs):
        # create gradient and hessian functions
        self.g = g

        # flatten gradient for simpler-written descent loop
        flat_g, unflatten, w = flatten_func(self.g, w)

        self.grad = compute_grad(flat_g)
        self.hess = compute_hess(flat_g)

        # parse optional arguments
        max_its = 20
        if 'max_its' in kwargs:
            max_its = kwargs['max_its']
        self.epsilon = 10**(-5)
        if 'epsilon' in kwargs:
            self.epsilon = kwargs['epsilon']
        verbose = False
        if 'verbose' in kwargs:
            verbose = kwargs['verbose']

        # create container for weight history
        w_hist = []
        w_hist.append(unflatten(w))

        # start newton's method loop
        if verbose == True:
            print('starting optimization...')

        geval_old = flat_g(w)
        for k in range(max_its):
            # compute gradient and hessian
            grad_val = self.grad(w)
            hess_val = self.hess(w)
            hess_val.shape = (np.size(w), np.size(w))

            # solve linear system for weights
            w = w - np.dot(
                np.linalg.pinv(hess_val + self.epsilon * np.eye(np.size(w))),
                grad_val)

            # eject from process if reaching singular system
            geval_new = flat_g(w)
            if k > 2 and geval_new > geval_old:
                print('singular system reached')
                time.sleep(1.5)
                clear_output()
                return w_hist
            else:
                geval_old = geval_new

            # record current weights
            w_hist.append(unflatten(w))

        if verbose == True:
            print('...optimization complete!')
            time.sleep(1.5)
            clear_output()

        return w_hist

コード例 #3

0

ファイルを表示

ファイル: newtons_method_demos.py プロジェクト: Recon419A/mlrefined_libraries

 def __init__(self,**args):
     self.g = args['g']                            # input function
     self.grad = compute_grad(self.g)              # gradient of input function
     self.hess = compute_hess(self.g)           # hessian of input function
     self.w_init =float( -2)                       # user-defined initial point (adjustable when calling each algorithm)
     self.alpha = 10**-4                           # user-defined step length for gradient descent (adjustable when calling gradient descent)
     self.max_its = 20                             # max iterations to run for each algorithm
     self.w_hist = []                              # container for algorithm path
     self.colors = [[0,1,0.25],[0,0.75,1]]    # set of custom colors used for plotting
     self.beta = 0

コード例 #4

0

ファイルを表示

ファイル: classification_bits.py プロジェクト: zhucer2003/machine_learning_refined

def newtons_method(g,w,x,y,beta,max_its):        
    # flatten gradient for simpler-written descent loop
    flat_g, unflatten, w = flatten_func(g, w)

    grad = compute_grad(flat_g)
    hess = compute_hess(flat_g)  

    # create container for weight history 
    w_hist = []
    w_hist.append(unflatten(w))
    
    g_hist = []
    geval_old = flat_g(w,x,y,beta)
    g_hist.append(geval_old)

    # main loop
    epsilon = 10**(-7)
    for k in range(max_its):
        # compute gradient and hessian
        grad_val = grad(w,x,y,beta)
        hess_val = hess(w,x,y,beta)
        hess_val.shape = (np.size(w),np.size(w))

        # solve linear system for weights
        w = w - np.dot(np.linalg.pinv(hess_val + epsilon*np.eye(np.size(w))),grad_val)

        # eject from process if reaching singular system
        geval_new = flat_g(w,x,y,beta)
        if k > 2 and geval_new > geval_old:
            print ('singular system reached')
            time.sleep(1.5)
            clear_output()
            return w_hist
        else:
            geval_old = geval_new

        # record current weights
        w_hist.append(unflatten(w))
        g_hist.append(geval_new)

    return w_hist,g_hist

コード例 #5

0

ファイルを表示

ファイル: animation_plotter.py プロジェクト: vsevolodovic/optimization_methods_in_machine_learning

    def newtons_method(self, g, w_hist, savepath, **kwargs):
        # compute gradient and hessian of input
        grad = compute_grad(g)  # gradient of input function
        hess = compute_hess(g)  # hessian of input function

        # set viewing range
        wmax = 3
        if 'wmax' in kwargs:
            wmax = kwargs['wmax']
        wmin = -wmax
        if 'wmin' in kwargs:
            wmin = kwargs['wmin']

        # initialize figure
        fig = plt.figure(figsize=(9, 4))
        artist = fig

        # create subplot with 3 panels, plot input function in center plot
        gs = gridspec.GridSpec(1, 3, width_ratios=[1, 4, 1])

        ax1 = plt.subplot(gs[0])
        ax1.axis('off')
        ax3 = plt.subplot(gs[2])
        ax3.axis('off')
        ax = plt.subplot(gs[1])

        # generate function for plotting on each slide
        w_plot = np.linspace(wmin, wmax, 1000)
        g_plot = g(w_plot)
        g_range = max(g_plot) - min(g_plot)
        ggap = g_range * 0.1
        w_vals = np.linspace(-2.5, 2.5, 50)
        width = 1

        # make color spectrum for points
        colorspec = self.make_colorspec(w_hist)

        # animation sub-function
        print('starting animation rendering...')
        num_frames = 2 * len(w_hist) + 2

        def animate(t):
            ax.cla()
            k = math.floor((t + 1) / float(2))

            # print rendering update
            if np.mod(k + 1, 25) == 0:
                print('rendering animation frame ' + str(k + 1) + ' of ' +
                      str(num_frames))
            if t == num_frames - 1:
                print('animation rendering complete!')
                time.sleep(1.5)
                clear_output()

            # plot function
            ax.plot(w_plot, g_plot, color='k', zorder=1)  # plot function

            # plot initial point and evaluation
            if k == 0:
                w_val = w_hist[0]
                g_val = g(w_val)
                ax.scatter(w_val,
                           g_val,
                           s=100,
                           c=colorspec[k],
                           edgecolor='k',
                           linewidth=0.7,
                           marker='X',
                           zorder=2)  # plot point of tangency
                ax.scatter(w_val,
                           0,
                           s=100,
                           c=colorspec[k],
                           edgecolor='k',
                           linewidth=0.7,
                           zorder=2)
                # draw dashed line connecting w axis to point on cost function
                s = np.linspace(0, g_val)
                o = np.ones((len(s)))
                ax.plot(o * w_val, s, 'k--', linewidth=1, zorder=0)

            # plot all input/output pairs generated by algorithm thus far
            if k > 0:
                # plot all points up to this point
                for j in range(min(k - 1, len(w_hist))):
                    w_val = w_hist[j]
                    g_val = g(w_val)
                    ax.scatter(w_val,
                               g_val,
                               s=90,
                               c=colorspec[j],
                               edgecolor='k',
                               marker='X',
                               linewidth=0.7,
                               zorder=3)  # plot point of tangency
                    ax.scatter(w_val,
                               0,
                               s=90,
                               facecolor=colorspec[j],
                               edgecolor='k',
                               linewidth=0.7,
                               zorder=2)

            # plot surrogate function and travel-to point
            if k > 0 and k < len(w_hist) + 1:
                # grab historical weight, compute function and derivative evaluations
                w_eval = w_hist[k - 1]
                if type(w_eval) != float:
                    w_eval = float(w_eval)

                # plug in value into func and derivative
                g_eval = g(w_eval)
                g_grad_eval = grad(w_eval)
                g_hess_eval = hess(w_eval)

                # determine width of plotting area for second order approximator
                width = 0.5
                if g_hess_eval < 0:
                    width = -width

                # setup quadratic formula params
                a = 0.5 * g_hess_eval
                b = g_grad_eval - 2 * 0.5 * g_hess_eval * w_eval
                c = 0.5 * g_hess_eval * w_eval**2 - g_grad_eval * w_eval - width

                # solve for zero points
                w1 = (-b + math.sqrt(b**2 - 4 * a * c)) / float(2 * a +
                                                                0.00001)
                w2 = (-b - math.sqrt(b**2 - 4 * a * c)) / float(2 * a +
                                                                0.00001)

                # compute second order approximation
                wrange = np.linspace(w1, w2, 100)
                h = g_eval + g_grad_eval * (
                    wrange - w_eval) + 0.5 * g_hess_eval * (wrange - w_eval)**2

                # plot tangent curve
                ax.plot(wrange,
                        h,
                        color=colorspec[k - 1],
                        linewidth=2,
                        zorder=2)  # plot approx

                # plot tangent point
                ax.scatter(w_eval,
                           g_eval,
                           s=100,
                           c='m',
                           edgecolor='k',
                           marker='X',
                           linewidth=0.7,
                           zorder=3)  # plot point of tangency

                # plot next point learned from surrogate
                if np.mod(t, 2) == 0:
                    # create next point information
                    w_zero = w_eval - g_grad_eval / (g_hess_eval + 10**-5)
                    g_zero = g(w_zero)
                    h_zero = g_eval + g_grad_eval * (
                        w_zero - w_eval) + 0.5 * g_hess_eval * (w_zero -
                                                                w_eval)**2

                    # draw dashed line connecting the three
                    vals = [0, h_zero, g_zero]
                    vals = np.sort(vals)

                    s = np.linspace(vals[0], vals[2])
                    o = np.ones((len(s)))
                    ax.plot(o * w_zero, s, 'k--', linewidth=1)

                    # draw intersection at zero and associated point on cost function you hop back too
                    ax.scatter(w_zero,
                               h_zero,
                               s=100,
                               c='b',
                               linewidth=0.7,
                               marker='X',
                               edgecolor='k',
                               zorder=3)
                    ax.scatter(w_zero,
                               0,
                               s=100,
                               c='m',
                               edgecolor='k',
                               linewidth=0.7,
                               zorder=3)
                    ax.scatter(w_zero,
                               g_zero,
                               s=100,
                               c='m',
                               edgecolor='k',
                               linewidth=0.7,
                               marker='X',
                               zorder=3)  # plot point of tangency

            # fix viewing limits on panel
            ax.set_xlim([wmin, wmax])
            ax.set_ylim(
                [min(-0.3,
                     min(g_plot) - ggap),
                 max(max(g_plot) + ggap, 0.3)])

            # add horizontal axis
            ax.axhline(y=0, color='k', zorder=0, linewidth=0.5)

            # label axes
            ax.set_xlabel(r'$w$', fontsize=14)
            ax.set_ylabel(r'$g(w)$', fontsize=14, rotation=0, labelpad=25)

            # set tickmarks
            ax.set_xticks(np.arange(round(wmin), round(wmax) + 1, 1.0))
            ax.set_yticks(
                np.arange(round(min(g_plot) - ggap),
                          round(max(g_plot) + ggap) + 1, 1.0))

            return artist,

        anim = animation.FuncAnimation(fig,
                                       animate,
                                       frames=num_frames,
                                       interval=num_frames,
                                       blit=True)

        # produce animation and save
        fps = 50
        if 'fps' in kwargs:
            fps = kwargs['fps']
        anim.save(savepath, fps=fps, extra_args=['-vcodec', 'libx264'])
        clear_output()

コード例 #6

0

ファイルを表示

    def newtons_method(self, g, win, **kwargs):
        # flatten gradient for simpler-written descent loop
        self.g, unflatten, w = flatten_func(g, win)

        self.grad = compute_grad(self.g)
        self.hess = compute_hess(self.g)

        # parse optional arguments
        max_its = 20
        if 'max_its' in kwargs:
            max_its = kwargs['max_its']
        self.epsilon = 10**-10
        if 'epsilon' in kwargs:
            self.epsilon = kwargs['epsilon']
        verbose = True
        if 'verbose' in kwargs:
            verbose = kwargs['verbose']
        output = 'history'
        if 'output' in kwargs:
            output = kwargs['output']
        self.counter = copy.deepcopy(self.g)
        if 'counter' in kwargs:
            counter = kwargs['counter']
            self.counter, unflatten, w = flatten_func(counter, win)

        # create container for weight history
        w_hist = []
        w_hist.append(unflatten(copy.deepcopy(w)))

        # start newton's method loop
        if verbose == True:
            print('starting optimization...')
        geval_old = self.g(w)

        self.w_best = unflatten(copy.deepcopy(w))
        g_best = self.counter(w)

        w_hist = []
        if output == 'history':
            w_hist.append(unflatten(w))

        # loop
        for k in range(max_its):
            # compute gradient and hessian
            grad_val = self.grad(w)
            hess_val = self.hess(w)
            hess_val.shape = (np.size(w), np.size(w))

            # solve linear system for weights
            C = hess_val + self.epsilon * np.eye(np.size(w))
            w = np.linalg.solve(C, np.dot(C, w) - grad_val)

            # eject from process if reaching singular system
            geval_new = self.g(w)
            if k > 2 and geval_new > geval_old:
                print('singular system reached')
                time.sleep(1.5)
                clear_output()
                if output == 'history':
                    return w_hist
                elif output == 'best':
                    return self.w_best
            else:
                geval_old = geval_new

            # record current weights
            if output == 'best':
                if self.g(w) < g_best:
                    g_best = self.counter(w)

                    self.w_best = copy.deepcopy(unflatten(w))

            w_hist.append(unflatten(w))

        if verbose == True:
            print('...optimization complete!')
            time.sleep(1.5)
            clear_output()
        if output == 'best':
            return self.w_best
        elif output == 'history':
            return w_hist