コード例 #1
0
def sgd(path, init, lr, lmda):
    #runs stochastic gradient descent on the function defined above
    #starting at the intial guess of the params provided as an argument
    #it also assumes the data you want to use is train_sgd, test_sgd
    #outputs the letter and word wise error after ~1000 updates
    #the below line is for testing if needed
    #print(check_grad(func1, func_prime, guess, random.choice(data), lmda))

    print("Reading Train Data...")
    data = read_data.read_train_sgd()
    print("Reading Test Data...")
    test_data = read_data.read_test_sgd()

    guess = np.copy(init)
    W, T = guess[:26*129].reshape((26, 129)),\
     guess[26*129:].reshape((26, 26))

    #variables for printing to file
    i, f = 0, open(path + f"/sgd-{lr}-{lmda}.txt", "w")

    #momentum variable
    m = np.zeros(129 * 26 + 26 * 26, dtype=np.longdouble)

    #Run descent forever
    print(f"Starting SGD with Momentum: lr:{lr} lambda:{lmda}")
    print(f"Starting SGD with Momentum: lr:{lr} lambda:{lmda}", file=f)

    prev = 0.0
    while True:
        #compute decay rate
        temp_lr = lr / (1 + 0.5 * i)

        #now check if we have converged print and return if the case
        current = func(guess, data, lmda)

        print(f"{i}:{current}:{temp_lr}", file=f)
        print(f"{i}\t{current}\t{temp_lr}")

        if abs(current - prev) < 1e-3:
            print("Convergence")
            return
        else:
            prev = current

        for j in range(len(data)):

            func_prime(guess, data[j], lmda)
            np.multiply(0.9, m, out=m)
            np.multiply(temp_lr, log_grad, out=log_grad)
            np.add(m, log_grad, out=m)
            np.subtract(guess, m, out=guess)

        i += 1
コード例 #2
0
def adam_gd():
    log_grad = np.zeros(26*129+26*26)
    m_grad = np.zeros(26*129+26*26)
    v_grad = np.zeros(26*129+26*26)
    t=0    
    alpha=0.001
    beta1=0.9
    beta2=0.999
    epsilon=1e-10
    max_iter=3000
    tol=1e-6
    #initial guess of 0
    guess = np.zeros((26*129+26*26))
    data = read_data.read_train_sgd()
    l = 1e-2
    print('Running ADAM')
    while True :
        temp = np.zeros((26*129+26*26))
        guess_new = np.zeros((26*129+26*26))
        m_grad_new = np.zeros(26*129+26*26)
        v_grad_new = np.zeros(26*129+26*26)
#        print('Iteration '+str(t))
        t=t+1
        if(t % 5 == 0):
            print(sgd.func(guess, data, l))
        for example in data:
    #        Get gradients w.r.t. stochastic objective at timestep t)
            log_grad=sgd.func_prime(guess,example,l)
    #        Update biased first moment estimate
            m_grad=beta1*m_grad+(1-beta1)*log_grad
    #        Update biased second raw moment estimate
            v_grad=beta2*v_grad + (1-beta2)*np.square(log_grad)
    #        Compute bias-corrected first moment estimate
            np.divide(m_grad, 1-np.power(beta1,t), out=m_grad_new)
    #        Compute bias-corrected second raw moment estimate
            np.divide(v_grad, 1-np.power(beta2,t), out=v_grad_new)
            np.multiply(m_grad_new, alpha*-1, out=temp)
            np.divide(temp ,(np.sqrt(v_grad_new)+epsilon),out=temp)
            np.add(guess, temp, out=guess_new)

#        print("Mean abs gradient is :"+str(np.mean((np.absolute(temp)))))           
        if(np.mean((np.absolute(temp)))<tol):
            guess=guess_new
            break
        else:
            guess=guess_new
            
        if(t>max_iter):
            break;
    return guess_new
コード例 #3
0
import numpy as np, read_data, prob_grad, random
from scipy.optimize import check_grad

l = 10
data = read_data.read_train_sgd()


def func(params, *args):
    #computes function value for a single example

    W, T = params[:26*129].reshape((26, 129)),\
     params[26*129:].reshape((26, 26))
    x, y = args[0]
    l = args[1]

    log_p = prob_grad.compute_log_p(x, y, W, T)

    return -1*log_p + 0.5*l*(\
     np.sum(np.square(W)) +\
     np.sum(np.square(T)))


def func_prime(params, *args):
    #computes the derivative of a single example

    W, T = params[:26*129].reshape((26, 129)),\
     params[26*129:].reshape((26, 26))
    x, y = args[0]
    l = args[1]

    log_grad = np.zeros(26 * 129 + 26 * 26)
コード例 #4
0
def adam_mcmc(path, init, lr, lmda, epsilon, s):
    #runs adam optimizer, inspired by ashwani

    print("Reading Train Data...")
    data = read_data.read_train_sgd()
    print("Reading Test Data...")
    test_data = read_data.read_test_sgd()

    print("Computing the frequencies")
    table = compute_freq(data)

    guess = np.copy(init)
    W, T = guess[:26*129].reshape((26, 129)),\
     guess[26*129:].reshape((26, 26))

    #adam parameters
    t, b1, b2, = 0, 0.9, 0.999
    m, v = np.zeros(26 * 129 + 26 * 26,
                    dtype=np.longdouble), np.zeros(26 * 129 + 26 * 26,
                                                   dtype=np.longdouble)
    i, f = 0, open(path + f"/adam-{lr}-{lmda}.txt", "w")

    print(f"Running Adam: lr:{lr} lambda:{lmda} epsilon:{epsilon}")
    print(f"Running Adam: lr:{lr} lambda:{lmda} epsilon:{epsilon}", file=f)

    prev = 0.0
    while True:

        if t % 30 == 0:
            current = func(guess, data, lmda)
            error = compute_test_error(f, test_data, W, T)
            print(f"{i}:{current}:{error}", file=f)
            print(f"{i}:{current}:{error}")
            if abs(current - prev) < 1e-3:
                print("Convergence")
                return
            else:
                prev = current

            i += 1
        t += 1

        temp_lr = lr / (1 + 0.5 * i)

        example = sample(data, table, s)
        func_prime(guess, example, lmda)

        np.multiply(b1, m, out=m)
        np.add(m, np.multiply((1 - b1), log_grad), out=m)

        np.multiply(b2, v, out=v)
        np.square(log_grad, out=log_grad)
        np.multiply((1 - b2), log_grad, out=log_grad)
        np.add(v, log_grad, out=v)

        np.divide(m, (1 - np.power(b1, t)), out=m)
        np.divide(v, (1 - np.power(b2, t)), out=v)

        np.multiply(-1 * temp_lr, m, out=m)
        np.sqrt(v, out=v)
        np.add(v, epsilon, out=v)
        np.divide(m, v, out=m)
        np.add(guess, m, out=guess)