Example #1
0
b = gpu.zeros((1,10))
mb = gpu.zeros((1,10))  

alpha = 0.1
momentum = 0.5
momentum_type = 1

for i in xrange(200):
    for i in xrange(X.shape[0]):        
        
        if momentum_type == 1:
            '''Use nesterov momentum to train the weights
            '''
            n = w + (m*momentum)
            nb = b + (mb*momentum)
            out = gpu.softmax(gpu.dot(X[i],n)+nb)
            gradb = gpu.dot(gpu.ones((1,batch_size)),out - t[i]) 
            grad = gpu.dot(X[i].T,out - t[i])
            
            m = m*momentum - (alpha*grad/128.)
            mb = mb*momentum - (alpha*gradb/128.)
            w += m
            b += mb
        elif momentum_type == 2:            
            '''Use classic momentum to train the weights
            '''
            out = gpu.softmax(gpu.dot(X[i],w)+b)
            gradb = gpu.dot(gpu.ones((1,batch_size)),out - t[i]) 
            grad = gpu.dot(X[i].T,out - t[i])
            
                       
Example #2
0
cv = []
train = np.float32(np.array(range(epochs)))
cv = np.float32(np.array(range(epochs)))
for epoch in range(epochs):

    for i in xrange(batches):
        #nesterov accelerated gradient
        n1 = w1+(m1*momentum)#nesterov updates 2.2 sec
        n2 = w2+(m2*momentum)
        nb1 = b1+(mb1*momentum)
        nb2 = b2+(mb2*momentum)
  
        z0 = X[i]*d02[rng.randint(0,75)]
        z1 = (gpu.dot(z0,n1)+nb1).logistic()*d05[rng.randint(0,75)]#dropout and activations 7.1 sec 
        t0 = time.time()            
        feedforward = gpu.softmax(gpu.dot(z1,n2)+nb2)
        time_softmax += time.time() - t0      
        #softmax 0.48 sec
        #gradients
        e1 = (feedforward - t[i])
        grad2 = gpu.dot(z1.T,e1) 
        grad1 = gpu.dot(X[i].T,(gpu.dot(e1,n2.T)* z1*(1-z1)))#grads 6 sec
        gradb2 = gpu.dot(gpu.ones((1, batch_size)),e1)
        gradb1= gpu.dot(gpu.ones((1, batch_size)),(gpu.dot(e1,n2.T)* z1*(1-z1)))
        #momentum and weight updates
        m1 = (momentum*m1) - ((grad1 + n1*L2)*alpha/(batch_size*1.0))#momentum und weight updates 7.4 sec    
        m2 = (momentum*m2) - ((grad2 + n2*L2)*alpha/(batch_size*1.0)) 
        mb1 = (momentum*mb1) - ((gradb1 + nb1*L2)*alpha/(batch_size*1.0))
        mb2 = (momentum*mb2) - ((gradb2 + nb2*L2)*alpha/(batch_size*1.0))
      
        w1 = w1 + m1
Example #3
0
b = gpu.zeros((1, 10))
mb = gpu.zeros((1, 10))

alpha = 0.1
momentum = 0.5
momentum_type = 1

for i in xrange(200):
    for i in xrange(X.shape[0]):

        if momentum_type == 1:
            '''Use nesterov momentum to train the weights
            '''
            n = w + (m * momentum)
            nb = b + (mb * momentum)
            out = gpu.softmax(gpu.dot(X[i], n) + nb)
            gradb = gpu.dot(gpu.ones((1, batch_size)), out - t[i])
            grad = gpu.dot(X[i].T, out - t[i])

            m = m * momentum - (alpha * grad / 128.)
            mb = mb * momentum - (alpha * gradb / 128.)
            w += m
            b += mb
        elif momentum_type == 2:
            '''Use classic momentum to train the weights
            '''
            out = gpu.softmax(gpu.dot(X[i], w) + b)
            gradb = gpu.dot(gpu.ones((1, batch_size)), out - t[i])
            grad = gpu.dot(X[i].T, out - t[i])

            m = m * momentum - (alpha * grad / 128.)
Example #4
0
train = np.float32(np.array(range(epochs)))
cv = np.float32(np.array(range(epochs)))
for epoch in range(epochs):

    for i in xrange(batches):
        #nesterov accelerated gradient
        n1 = w1 + (m1 * momentum)  #nesterov updates 2.2 sec
        n2 = w2 + (m2 * momentum)
        nb1 = b1 + (mb1 * momentum)
        nb2 = b2 + (mb2 * momentum)

        z0 = X[i] * d02[rng.randint(0, 75)]
        z1 = (gpu.dot(z0, n1) + nb1).logistic() * d05[rng.randint(
            0, 75)]  #dropout and activations 7.1 sec
        t0 = time.time()
        feedforward = gpu.softmax(gpu.dot(z1, n2) + nb2)
        time_softmax += time.time() - t0
        #softmax 0.48 sec
        #gradients
        e1 = (feedforward - t[i])
        grad2 = gpu.dot(z1.T, e1)
        grad1 = gpu.dot(X[i].T,
                        (gpu.dot(e1, n2.T) * z1 * (1 - z1)))  #grads 6 sec
        gradb2 = gpu.dot(gpu.ones((1, batch_size)), e1)
        gradb1 = gpu.dot(gpu.ones((1, batch_size)),
                         (gpu.dot(e1, n2.T) * z1 * (1 - z1)))
        #momentum and weight updates
        m1 = (momentum * m1) - (
            (grad1 + n1 * L2) * alpha /
            (batch_size * 1.0))  #momentum und weight updates 7.4 sec
        m2 = (momentum * m2) - ((grad2 + n2 * L2) * alpha / (batch_size * 1.0))