예제 #1
0
def trainSGD(netConfig, x, y, lmb):
    th1, th2 = SimpleNN2.initRandomThetas(netConfig)

    alpha = 0.1
    costs = []

    numSamples = x.shape[0]
    miniBatchSize = 200

    for i in range((numSamples-2)//miniBatchSize):

        fr = i*miniBatchSize
        to = (i+1)*miniBatchSize

        xi = x[fr:to,:]
        yi = y[fr:to]

        costBefore = 0.0

        if len(costs) > 0:
            costBefore = costs[-1]
        else:
            costBefore = SimpleNN2.computeCost(netConfig, th1, th2, xi, yi, lmb)

        grad1, grad2 = SimpleNN2.computeGrad(netConfig, th1, th2, xi, yi, lmb)

        alpha = findOptimalAlpha(netConfig, th1, th2, xi, yi, lmb, grad1, grad2, alpha/2, alpha*2)

        th1p = th1 - alpha*grad1
        th2p = th2 - alpha*grad2

        costAfter = SimpleNN2.computeCost(netConfig, th1p, th2p, xi, yi, lmb)

        if costAfter <= costBefore:
            costs.append(costAfter)
            th1 = th1p
            th2 = th2p
        #else:
        #    # Find optimal alpha in a wide range
        #    alpha = findOptimalAlpha(netConfig, th1, th2, xi, yi, lmb, grad1, grad2, alpha/50, alpha)
        #    th1p = th1 - alpha*grad1
        #    th2p = th2 - alpha*grad2

        #    costAfter = SimpleNN2.computeCost(netConfig, th1p, th2p, xi, yi, lmb)

        #    if costAfter <= costBefore:
        #        costs.append(costAfter)
        #        th1 = th1p
        #        th2 = th2p

        if len(costs) > 0 and len(costs) % 10 == 0:
            print('Epoch', len(costs), 'with cost', costs[-1], 'and alpha', alpha)

    return th1, th2
예제 #2
0
def trainSciPy2(netConfig, x, y, lmb):

    th1, th2 = SimpleNN2.initRandomThetas(netConfig)

    combinedTheta = SimpleNN2.combineThetas(th1, th2)

    optimizedTheta = minimize(
        fun = lambda p: SimpleNN2.computeCostComb(netConfig, p, x, y, lmb) ,
        x0 = combinedTheta,
        method = 'L-BFGS-B',
        jac = lambda p: SimpleNN2.computeGradComb(netConfig, p, x, y, lmb),
        #callback = lambda xk: print("Iteration complete!"),
        options={'disp': False}) #'maxiter' : 5, 'eps' : 1e-10, 'gtol' : 1e-10

    return SimpleNN2.splitThetas(netConfig, optimizedTheta.x)
예제 #3
0
def trainGradientDescent2(netConfig, x, y, lmb):
    
    th1, th2 = SimpleNN2.initRandomThetas(netConfig)

    alpha = 2.0
    costs = []

    while True:

        costBefore = SimpleNN2.computeCost(netConfig, th1, th2, x, y, lmb)
        grad1, grad2 = SimpleNN2.computeGrad(netConfig, th1, th2, x, y, lmb)

        th1p = th1 - alpha*grad1
        th2p = th2 - alpha*grad2
        
        costAfter = SimpleNN2.computeCost(netConfig, th1p, th2p, x, y, lmb)

        skipUpdate = False
        if costAfter > costBefore:
            alpha = alpha / 1.01
            skipUpdate = True
            print("Decrease alpha due to cyclic behaviour")

        if not skipUpdate:
            costs.append(costAfter)
            th1 = th1p
            th2 = th2p

        if len(costs) > 0 and len(costs) % 10 == 0:
            print('Epoch', len(costs), 'with cost', costs[-1], 'and alpha', alpha)

        if len(costs) > 2 and abs(costs[-2] - costs[-1]) < 0.00001:
            if alpha < 0.02:
                break
            else:
                print("Decrease alpha due to close costs")
                alpha = alpha / 1.5
    
    return th1, th2