Ejemplo n.º 1
0
    def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float:
        num_correct = 0
        for n in range(low, hi):
            x = binary_encode(n)
            predicted = argmax(net.forward(x))
            actual = argmax(fizz_buzz_encode(n))
            if predicted == actual:
                num_correct += 1

        return num_correct / (hi - low)
Ejemplo n.º 2
0
        self.tanh, gradient)    

class Relu(Layer):
    def forward(self, input: Tensor) -> Tensor:
        self.input = input
        return tensor_apply(lambda x: max(x,0), input)
    
    def backward(self, gradient: Tensor) -> Tensor:
        return tensor_combine(lambda x, grad: grad if x > 0 else 0,
                             self.input,
                             gradient)    

    
from neural_networks import binary_encode, fizz_buzz_encode, argmax

xs  = [binary_encode(n) for n in range(101, 1024)]
ys = [fizz_buzz_encode(n) for n in range(101, 1024)]    

NUM_HIDDEN = 25
random.seed(0)

net = Sequential([
    Linear(input_dim = 10, output_dim = NUM_HIDDEN, init = 'uniform'),
    Tanh(),
    Linear(input_dim = NUM_HIDDEN, output_dim = 4, init = 'uniform'),
    Sigmoid()

])


def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float:
Ejemplo n.º 3
0
def main():

    # XOR revisited

    # training data
    xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
    ys = [[0.], [1.], [1.], [0.]]

    random.seed(0)

    net = Sequential([
        Linear(input_dim=2, output_dim=2),
        Sigmoid(),
        Linear(input_dim=2, output_dim=1)
    ])

    import tqdm

    optimizer = GradientDescent(learning_rate=0.1)
    loss = SSE()

    with tqdm.trange(3000) as t:
        for epoch in t:
            epoch_loss = 0.0

            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)

                optimizer.step(net)

            t.set_description(f"xor loss {epoch_loss:.3f}")

    for param in net.params():
        print(param)

    # FizzBuzz Revisited

    from neural_networks import binary_encode, fizz_buzz_encode, argmax

    xs = [binary_encode(n) for n in range(101, 1024)]
    ys = [fizz_buzz_encode(n) for n in range(101, 1024)]

    NUM_HIDDEN = 25

    random.seed(0)

    net = Sequential([
        Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'),
        Tanh(),
        Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform'),
        Sigmoid()
    ])

    def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float:
        num_correct = 0
        for n in range(low, hi):
            x = binary_encode(n)
            predicted = argmax(net.forward(x))
            actual = argmax(fizz_buzz_encode(n))
            if predicted == actual:
                num_correct += 1

        return num_correct / (hi - low)

    optimizer = Momentum(learning_rate=0.1, momentum=0.9)
    loss = SSE()

    with tqdm.trange(1000) as t:
        for epoch in t:
            epoch_loss = 0.0

            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)

                optimizer.step(net)

            accuracy = fizzbuzz_accuracy(101, 1024, net)
            t.set_description(f"fb loss: {epoch_loss:.2f} acc: {accuracy:.2f}")

    # Now check results on the test set
    print("test results", fizzbuzz_accuracy(1, 101, net))

    random.seed(0)

    net = Sequential([
        Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'),
        Tanh(),
        Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform')
        # No final sigmoid layer now
    ])

    optimizer = Momentum(learning_rate=0.1, momentum=0.9)
    loss = SoftmaxCrossEntropy()

    with tqdm.trange(100) as t:
        for epoch in t:
            epoch_loss = 0.0

            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)

                optimizer.step(net)

            accuracy = fizzbuzz_accuracy(101, 1024, net)
            t.set_description(f"fb loss: {epoch_loss:.3f} acc: {accuracy:.2f}")

    # Again check results on the test set
    print("test results", fizzbuzz_accuracy(1, 101, net))

    # Load the MNIST data

    import mnist

    # This will download the data, change this to where you want it.
    # (Yes, it's a 0-argument function, that's what the library expects.)
    # (Yes, I'm assigning a lambda to a variable, like I said never to do.)
    mnist.temporary_dir = lambda: '/tmp'

    # Each of these functions first downloads the data and returns a numpy array.
    # We call .tolist() because our "tensors" are just lists.
    train_images = mnist.train_images().tolist()
    train_labels = mnist.train_labels().tolist()

    assert shape(train_images) == [60000, 28, 28]
    assert shape(train_labels) == [60000]

    import matplotlib.pyplot as plt

    fig, ax = plt.subplots(10, 10)

    for i in range(10):
        for j in range(10):
            # Plot each image in black and white and hide the axes.
            ax[i][j].imshow(train_images[10 * i + j], cmap='Greys')
            ax[i][j].xaxis.set_visible(False)
            ax[i][j].yaxis.set_visible(False)

    # plt.show()

    # Load the MNIST test data

    test_images = mnist.test_images().tolist()
    test_labels = mnist.test_labels().tolist()

    assert shape(test_images) == [10000, 28, 28]
    assert shape(test_labels) == [10000]

    # Recenter the images

    # Compute the average pixel value
    avg = tensor_sum(train_images) / 60000 / 28 / 28

    # Recenter, rescale, and flatten
    train_images = [[(pixel - avg) / 256 for row in image for pixel in row]
                    for image in train_images]
    test_images = [[(pixel - avg) / 256 for row in image for pixel in row]
                   for image in test_images]

    assert shape(train_images) == [60000, 784], "images should be flattened"
    assert shape(test_images) == [10000, 784], "images should be flattened"

    # After centering, average pixel should be very close to 0
    assert -0.0001 < tensor_sum(train_images) < 0.0001

    # One-hot encode the test data

    train_labels = [one_hot_encode(label) for label in train_labels]
    test_labels = [one_hot_encode(label) for label in test_labels]

    assert shape(train_labels) == [60000, 10]
    assert shape(test_labels) == [10000, 10]

    # Training loop

    import tqdm

    def loop(model: Layer,
             images: List[Tensor],
             labels: List[Tensor],
             loss: Loss,
             optimizer: Optimizer = None) -> None:
        correct = 0  # Track number of correct predictions.
        total_loss = 0.0  # Track total loss.

        with tqdm.trange(len(images)) as t:
            for i in t:
                predicted = model.forward(images[i])  # Predict.
                if argmax(predicted) == argmax(labels[i]):  # Check for
                    correct += 1  # correctness.
                total_loss += loss.loss(predicted, labels[i])  # Compute loss.

                # If we're training, backpropagate gradient and update weights.
                if optimizer is not None:
                    gradient = loss.gradient(predicted, labels[i])
                    model.backward(gradient)
                    optimizer.step(model)

                # And update our metrics in the progress bar.
                avg_loss = total_loss / (i + 1)
                acc = correct / (i + 1)
                t.set_description(f"mnist loss: {avg_loss:.3f} acc: {acc:.3f}")

    # The logistic regression model for MNIST

    random.seed(0)

    # Logistic regression is just a linear layer followed by softmax
    model = Linear(784, 10)
    loss = SoftmaxCrossEntropy()

    # This optimizer seems to work
    optimizer = Momentum(learning_rate=0.01, momentum=0.99)

    # Train on the training data
    loop(model, train_images, train_labels, loss, optimizer)

    # Test on the test data (no optimizer means just evaluate)
    loop(model, test_images, test_labels, loss)

    # A deep neural network for MNIST

    random.seed(0)

    # Name them so we can turn train on and off
    dropout1 = Dropout(0.1)
    dropout2 = Dropout(0.1)

    model = Sequential([
        Linear(784, 30),  # Hidden layer 1: size 30
        dropout1,
        Tanh(),
        Linear(30, 10),  # Hidden layer 2: size 10
        dropout2,
        Tanh(),
        Linear(10, 10)  # Output layer: size 10
    ])

    # Training the deep model for MNIST

    optimizer = Momentum(learning_rate=0.01, momentum=0.99)
    loss = SoftmaxCrossEntropy()

    # Enable dropout and train (takes > 20 minutes on my laptop!)
    dropout1.train = dropout2.train = True
    loop(model, train_images, train_labels, loss, optimizer)

    # Disable dropout and evaluate
    dropout1.train = dropout2.train = False
    loop(model, test_images, test_labels, loss)