Esempio n. 1
0
  def loss(self, X, y=None):
    """
    Evaluate loss and gradient for the three-layer convolutional network.
    """
    W1 = self.params['W1']
    W2, b2 = self.params['W2'], self.params['b2']
    W3, b3 = self.params['W3'], self.params['b3']

    # pass pool_param to the forward pass for the max-pooling layer
    pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

    scores = None
    conv, cache1 = layers.conv_forward(X,W1)
    relu1, cache2 = layers.relu_forward(conv)
    maxp, cache3 = layers.max_pool_forward(relu1,pool_param)
    fc1, cache4 = layers.fc_forward(maxp,W2,b2)
    relu2, cache5 = layers.relu_forward(fc1)
    scores, cache6 = layers.fc_forward(relu2,W3,b3)

    if y is None:
      return scores

    loss, grads = 0, {}
    loss, dscores = layers.softmax_loss(scores,y)
    dx3, dW3, db3 = layers.fc_backward(dscores,cache6)
    dRelu2 = layers.relu_backward(dx3,cache5)
    dx2, dW2, db2 = layers.fc_backward(dRelu2,cache4)
    dmaxp = layers.max_pool_backward(dx2.reshape(maxp.shape),cache3)
    dRelu1 = layers.relu_backward(dmaxp,cache2)
    dx,dW1 = layers.conv_backward(dRelu1,cache1)
    
    grads = {'W1':dW1,'W2':dW2,'b2':db2,'W3':dW3,'b3':db3}

    return loss, grads
def forward(data, model):
    """
    Input:
        data  : (N, C, H, W)
        label : (N, K)
        y     : (N, )
    Output:
        cost : 
        rate :
    """
    w1 = "w1"
    b1 = "b1"
    w3 = "w3"
    b3 = "b3"
    w5 = "w5"
    b5 = "b5"
    w6 = "w6"
    b6 = "b6"
    wo = "wo"
    bo = "bo"

    #forward pass
    h1_pre = layers.conv_forward(data, model[w1], model[b1])
    h1 = layers.ReLu_forward(h1_pre)
    #print (h1[0][0])

    h2 = layers.max_pool(h1, 2)

    h3_pre = layers.conv_forward(h2, model[w3], model[b3])
    h3 = layers.ReLu_forward(h3_pre)

    h4 = layers.max_pool(h3, 2)

    h5_pre = layers.conv_forward(h4, model[w5], model[b5])
    h5 = layers.ReLu_forward(h5_pre)

    h6 = layers.full_forward(h5, model[w6], model[b6])

    out = layers.full_forward(h6, model[wo],
                              model[bo])  #after this we need softmax
    return out  #soft max is linear so ok
Esempio n. 3
0
    def test_conv_backward(self):
        # Conv backward
        np.random.seed(498)
        x = np.random.randn(3, 2, 7, 7)
        w = np.random.randn(4, 2, 3, 3)

        dout = np.random.randn(3, 4, 5, 5)

        dx_num = eval_numerical_gradient_array(
            lambda x: layers.conv_forward(x, w)[0], x, dout)
        dw_num = eval_numerical_gradient_array(
            lambda w: layers.conv_forward(x, w)[0], w, dout)

        _, cache = layers.conv_forward(x, w)
        dx, dw = layers.conv_backward(dout, cache)

        print('\nTesting conv_backward function:')
        # The errors should be around 3e-9
        print('dx error: ', rel_error(dx_num, dx))
        np.testing.assert_allclose(dx, dx_num, atol=1e-8)
        # The errors should be around 5e-10
        print('dw error: ', rel_error(dw_num, dw))
        np.testing.assert_allclose(dx, dx_num, atol=1e-8)
Esempio n. 4
0
    def test_conv_forward(self):
        # Conv forward
        x = np.linspace(-0.1, 2.5, num=36).reshape(1, 1, 6, 6)
        w = np.linspace(-0.9, 0.6, num=9).reshape(1, 1, 3, 3)

        out, _ = layers.conv_forward(x, w)
        correct_out = np.array(
            [[[[1.02085714, 0.92057143, 0.82028571, 0.72],
               [0.41914286, 0.31885714, 0.21857143, 0.11828571],
               [-0.18257143, -0.28285714, -0.38314286, -0.48342857],
               [-0.78428571, -0.88457143, -0.98485714, -1.08514286]]]])

        # Compare your output with ours. The error might be around 2e-8.
        # As long as your error is small enough, your implementation should pass this test.
        print('\nTesting conv_forward function:')
        print('difference: ', rel_error(out, correct_out))
        np.testing.assert_allclose(out, correct_out, atol=1e-7)
Esempio n. 5
0
def train(data, label, y, model, alpha = 0.001): 
    """
    Input:
        data  : (N, C, H, W)
        label : (N, K)
        y     : (N, )
    Output:
        cost : 
        rate :
    """
    w1 = "w1"; b1 = "b1"
    w3 = "w3"; b3 = "b3"
    w5 = "w5"; b5 = "b5"
    w6 = "w6"; b6 = "b6"
    wo = "wo"; bo = "bo"

    #forward pass
    h1_pre = layers.conv_forward(data, model[w1], model[b1])
    h1 = layers.ReLu_forward(h1_pre)
    #print (h1[0][0])

    h2 = layers.max_pool(h1, 2)

    h3_pre = layers.conv_forward(h2, model[w3], model[b3])
    h3 = layers.ReLu_forward(h3_pre)

    h4 = layers.max_pool(h3, 2)

    h5_pre = layers.conv_forward(h4, model[w5], model[b5])
    h5 = layers.ReLu_forward(h5_pre)

    h6 = layers.full_forward(h5, model[w6], model[b6]) 

    out = layers.full_forward(h6, model[wo], model[bo]) #after this we need softmax 
    y_hat = layers.softmax(out)

    y_hat_arg = np.argmax(y_hat, axis = 1)
    dout = (y_hat - y)
    cost = layers.cost(y_hat, y)
    rate = layers.classification_rate(label, y_hat_arg)

    #gd
    print ("------")
    print (y_hat)
    print ("gradient updates : ");
    print ("------")

    dout_h6, dwo_gradient, dbo_gradient = layers.full_backward(dout, h6, model[wo], model[bo])
    model[wo] =  model[wo] - alpha * dwo_gradient
    model[bo] =  model[bo] - alpha * dbo_gradient

    dout_h5, dw6_gradient, db6_gradient = layers.full_backward(dout_h6, h5, model[w6], model[b6])
    model[w6] =  model[w6] - alpha * dw6_gradient
    model[b6] =  model[b6] - alpha * db6_gradient

    dout_h4, dw5_gradient, db5_gradient = layers.conv_backward(layers.ReLu_backward(h5_pre, dout_h5), h4, model[w5], model[b5])
    model[w5] =  model[w5] - alpha * dw5_gradient
    model[b5] =  model[b5] - alpha * db5_gradient

    dout_h3 = layers.max_pool_back(h3, dout_h4, 2)

    dout_h2, dw3_gradient, db3_gradient = layers.conv_backward(layers.ReLu_backward(h3_pre, dout_h3), h2, model[w3], model[b3])
    model[w3] =  model[w3] - alpha * dw3_gradient
    model[b3] =  model[b3] - alpha * db3_gradient
   
    dout_h1 = layers.max_pool_back(h1, dout_h2, 2)

    d_data, dw1_gradient, db1_gradient = layers.conv_backward(layers.ReLu_backward(h1_pre, dout_h1), data, model[w1], model[b1])
    model[w1] =  model[w1] - alpha * dw1_gradient
    model[b1] =  model[b1] - alpha * db1_gradient

    return cost, rate
def train(data, i, model, alpha=0.0001):
    """
    Input:
        data  : (N, C, H, W)
        label : (N, K)
        y     : (N, )
    Output:
        cost : 
        rate :
    """
    w1 = "w1"
    b1 = "b1"
    w3 = "w3"
    b3 = "b3"
    w5 = "w5"
    b5 = "b5"
    w6 = "w6"
    b6 = "b6"
    wo = "wo"
    bo = "bo"

    n = data.shape[0]
    t = 5
    start = i * t
    start = start % n
    end = start + t
    data = data[start:end, ]

    #data to data and y. label and model
    y = np.array(data[:, -1])
    y.resize(t, 1)
    label = np.zeros((t, 10))
    for i in range(t):
        label[i][int(y[i])] = 1

    data = np.array(data[:, 0:28 * 28])
    data.resize(t, 1, 28, 28)
    data = data * 1.01

    #drop out rate
    #drop out implementation is ugly here, should move to layer as an operation and transparent to convnets
    p = 0.95

    #forward pass
    h1_pre = layers.conv_forward(data, model[w1], model[b1])
    h1 = layers.ReLu_forward(h1_pre)
    #print (h1[0][0])

    h2 = layers.max_pool(h1, 2)
    U2 = (np.random.rand(*h2.shape) < p) / p
    h2 *= U2  # drop!

    h3_pre = layers.conv_forward(h2, model[w3], model[b3])
    h3 = layers.ReLu_forward(h3_pre)

    h4 = layers.max_pool(h3, 2)
    U4 = (np.random.rand(*h4.shape) < p) / p
    h4 *= U4  # drop!

    h5_pre = layers.conv_forward(h4, model[w5], model[b5])
    h5 = layers.ReLu_forward(h5_pre)

    U5 = (np.random.rand(*h5.shape) < p) / p
    h5 *= U5  # drop!

    h6 = layers.full_forward(h5, model[w6], model[b6])
    U6 = (np.random.rand(*h6.shape) < p) / p
    h6 *= U6  # drop!

    out = layers.full_forward(h6, model[wo], model[bo])
    y_hat = layers.softmax(out)

    y_hat_arg = np.argmax(y_hat, axis=1)
    dout = (y_hat - label)
    cost = layers.cost(y_hat, label)
    rate = layers.classification_rate(y, y_hat_arg)

    print("------")
    print("gradient updates : ")
    print("cost : ", cost)
    print("rate : ", rate)

    dout_h6, dwo_gradient, dbo_gradient = layers.full_backward(
        dout, h6, model[wo], model[bo])
    dout_h6 *= U6

    dout_h5, dw6_gradient, db6_gradient = layers.full_backward(
        dout_h6, h5, model[w6], model[b6])
    dout_h5 *= U5

    dout_h4, dw5_gradient, db5_gradient = layers.conv_backward(
        layers.ReLu_backward(h5_pre, dout_h5), h4, model[w5], model[b5])
    dout_h4 *= U4

    dout_h3 = layers.max_pool_back(h3, dout_h4, 2)

    dout_h2, dw3_gradient, db3_gradient = layers.conv_backward(
        layers.ReLu_backward(h3_pre, dout_h3), h2, model[w3], model[b3])
    dout_h2 *= U2

    dout_h1 = layers.max_pool_back(h1, dout_h2, 2)

    d_data, dw1_gradient, db1_gradient = layers.conv_backward(
        layers.ReLu_backward(h1_pre, dout_h1), data, model[w1], model[b1])

    gradients = {}
    gradients[wo] = dwo_gradient
    gradients[bo] = dbo_gradient

    gradients[w6] = dw6_gradient
    gradients[b6] = db6_gradient

    gradients[w5] = dw5_gradient
    gradients[b5] = db5_gradient

    gradients[w3] = dw3_gradient
    gradients[b3] = db3_gradient

    gradients[w1] = dw1_gradient
    gradients[b1] = db1_gradient

    return [gradients, cost, rate]