Exemple #1
0
    def backprop(self, x, y):
        new_biase = [np.zeros(b.shape) for b in self.biases]
        new_weight = [np.zeros(w.shape) for w in self.weights]

        activation = x
        activations = [x]  # list to store all the activations, layer by layer

        zs = []  # list to store all the z vectors, layer by layer

        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        # backward pass
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(
            zs[-1])
        new_biase[-1] = delta
        new_weight[-1] = np.dot(delta, activations[-2].transpose())

        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
            new_biase[-l] = delta
            new_weight[-l] = np.dot(delta, activations[-l - 1].transpose())
        return (new_biase, new_weight)
def train(train_x, train_y, learning_rate=0.2):
    # Flatten input (batch_size, 28, 28) -> (batch_size, 784)
    x = train_x.reshape(train_x.shape[0], -1)

    # Turn labels into their one-hot representations
    y = one_hot_encoder(train_y)

    # Initialize weights
    w1, b1 = initialize_weight((784, 256), bias=True)
    w2, b2 = initialize_weight((256, 10), bias=True)

    num_epochs = 50
    loss_history = []
    for epoch in range(1, num_epochs + 1):
        print("Epoch {}/{}\n===============".format(epoch, num_epochs))

        # Forward Prop
        h1 = np.dot(x, w1) + b1
        a1 = sigmoid(h1)
        h2 = np.dot(a1, w2) + b2
        a2 = softmax(h2)
        out = a2

        # Cross Entropy Loss
        loss = cross_entropy_loss(out, train_y)
        loss_history.append(loss)
        print("Loss: {:.6f}".format(loss))

        # Compute and print accuracy
        pred = np.argmax(out, axis=1)
        pred = pred.reshape(pred.shape[0], 1)
        acc = np.mean(pred == train_y)
        print("Accuracy: {:.2f}%\n".format(acc * 100))

        # Backward Prop
        m = out.shape[0]
        dh2 = a2 - y
        dw2 = (1 / m) * np.dot(a1.T, dh2)
        db2 = (1 / m) * np.sum(dh2, axis=0, keepdims=True)

        dh1 = np.dot(dh2, w2.T) * sigmoid_prime(a1)
        dw1 = (1 / m) * np.dot(x.T, dh1)
        db1 = (1 / m) * np.sum(dh1, axis=0, keepdims=True)

        # Weight (and bias) update
        w1 -= learning_rate * dw1
        b1 -= learning_rate * db1
        w2 -= learning_rate * dw2
        b2 -= learning_rate * db2

    return w1, b1, w2, b2, loss_history
Exemple #3
0
def test_sigmoid():
    z = np.arange(-10, 10, 0.1)
    y = act.sigmoid(z)
    y_p = act.sigmoid_prime(z)

    plt.figure()
    plt.subplot(1, 2, 1)
    plt.plot(z, y)
    plt.title('sigmoid')

    plt.subplot(1, 2, 2)
    plt.plot(z, y_p)
    plt.title('derivative sigmoid')
    plt.show()
Exemple #4
0
def backprop(x, y, biases, weights, cost, num_layers):
    """ function of backpropagation
	Return a tuple ``(nabla_b, nabla_w)`` representing the
	gradient of all biases and weights.

	Args:
	x, y: input image x and label y
	biases, weights (list): list of biases and weights of entire network
	cost (CrossEntropyCost): object of cost computation
	num_layers (int): number of layers of the network

	Returns:
	(nabla_b, nabla_w): tuple containing the gradient for all the biases
	and weights. nabla_b and nabla_w should be the same shape as 
	input biases and weights
	"""
    # initial zero list for store gradient of biases and weights
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_w = [np.zeros(w.shape) for w in weights]

    #print(num_layers)

    #print(x.shape)
    #print(weights[0].shape)
    #print(biases[0].shape)

    ### Implement here
    # feedforward
    # Here you need to store all the activations of all the units
    # by feedforward pass
    ###

    h = []
    h.append(x)

    for i in range((num_layers - 1)):
        a = sigmoid(np.dot(weights[i], h[i]) + biases[i])
        h.append(a)

    #h1 = sigmoid(np.dot(weights[0],x) + biases[0])
    #h2 = sigmoid(np.dot(weights[1],h1) + biases[1])

    # compute the gradient of error respect to output
    # activations[-1] is the list of activations of the output layer

    delta = (cost).delta(h[-1], y)

    ### Implement here
    # backward pass
    # Here you need to implement the backward pass to compute the
    # gradient for each weight and bias
    ###

    nabla_b[-1] = delta * sigmoid_prime(h[-1])
    nabla_w[-1] = np.dot(nabla_b[-1], h[-2].transpose())

    for i in range(num_layers - 3, -1, -1):
        nabla_b[i] = np.dot(weights[i + 1].transpose(),
                            nabla_b[i + 1]) * sigmoid_prime(h[i + 1])
        nabla_w[i] = np.dot(nabla_b[i], h[i].transpose())

    # for i in range(0,num_layers-1):
    # 	nabla_b[-2-i] = np.dot(weights[-2-i+1].transpose(),nabla_b[-2-i+1])*sigmoid_prime(h[-2-i+1])
    # 	nabla_w[-2-i] = np.dot(nabla_b[-2-i],h[-2-i].transpose())

    #nabla_b[0] = np.dot(weights[1].transpose(),nabla_b[1])*sigmoid_prime(h[1])
    #nabla_w[0] = np.dot(nabla_b[0],h[0].transpose())
    #nabla_b[0] = sigmoid_prime(h1)

    #print(weights[1].shape)
    #print(nabla_w[0].shape)
    #print(nabla_w[1].shape)

    #print(np.dot(nabla_b[1],h1.transpose()).shape)
    #print(np.dot(nabla_b[0],x.transpose()).shape)

    return (nabla_b, nabla_w)
Exemple #5
0
def train(train_x, train_y, learning_rate=0.1, num_epochs=50, batch_size=1):
    # Flatten input (num_samples, 28, 28) -> (num_samples, 784)
    x = train_x.reshape(train_x.shape[0], -1)
    num_samples = x.shape[0]

    # Turn labels into their one-hot representations
    y = one_hot_encoder(train_y)

    # Make a data loader
    trainloader = dataloader(x, y, batch_size=batch_size, shuffle=True)

    # Initialize weights
    w1, b1 = initialize_weight((784, 256), bias=True)
    w2, b2 = initialize_weight((256, 10), bias=True)

    loss_history = []
    for epoch in range(1, num_epochs + 1):
        print("Epoch {}/{}\n===============".format(epoch, num_epochs))

        batch_loss = 0
        acc = 0
        for inputs, labels in trainloader:
            # Number of samples per batch
            m = inputs.shape[0]

            # Forward Prop
            h1 = np.dot(inputs, w1) + b1
            a1 = sigmoid(h1)
            h2 = np.dot(a1, w2) + b2
            a2 = softmax(h2)
            out = a2

            # Cross Entropy Loss
            batch_loss += cross_entropy_loss(
                out,
                labels.argmax(axis=1).reshape(m, 1))

            # Compute Accuracy
            pred = np.argmax(out, axis=1)
            pred = pred.reshape(pred.shape[0], 1)
            acc += np.sum(pred == labels.argmax(axis=1).reshape(m, 1))

            # Backward Prop
            dh2 = a2 - labels
            dw2 = (1 / m) * np.dot(a1.T, dh2)
            db2 = (1 / m) * np.sum(dh2, axis=0, keepdims=True)

            dh1 = np.dot(dh2, w2.T) * sigmoid_prime(a1)
            dw1 = (1 / m) * np.dot(inputs.T, dh1)
            db1 = (1 / m) * np.sum(dh1, axis=0, keepdims=True)

            # Weight (and bias) update
            w1 -= learning_rate * dw1
            b1 -= learning_rate * db1
            w2 -= learning_rate * dw2
            b2 -= learning_rate * db2

        loss_history.append(batch_loss / num_samples)
        print("Loss: {:.6f}".format(batch_loss / num_samples))
        print("Accuracy: {:.2f}%\n".format(acc / num_samples * 100))

    return w1, b1, w2, b2, loss_history