def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float:
     num_correct = 0
     for n in range(low, hi):
         x = binary_encode(n)
         predicted = argmax(net.forward(x))
         actual = argmax(fizz_buzz_encode(n))
         if predicted == actual:
             num_correct += 1
 
     return num_correct / (hi - low)
    def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float:
        num_correct = 0
        for n in range(low, hi):
            x = binary_encode(n)
            predicted = argmax(net.forward(x))
            actual = argmax(fizz_buzz_encode(n))
            if predicted == actual:
                num_correct += 1

        return num_correct / (hi - low)
def main():
    
    # XOR revisited
    
    # training data
    xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
    ys = [[0.], [1.], [1.], [0.]]
    
    random.seed(0)
    
    net = Sequential([
        Linear(input_dim=2, output_dim=2),
        Sigmoid(),
        Linear(input_dim=2, output_dim=1)
    ])
    
    import tqdm
    
    optimizer = GradientDescent(learning_rate=0.1)
    loss = SSE()
    
    with tqdm.trange(3000) as t:
        for epoch in t:
            epoch_loss = 0.0
    
            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)
    
                optimizer.step(net)
    
            t.set_description(f"xor loss {epoch_loss:.3f}")
    
    for param in net.params():
        print(param)
    
    
    # FizzBuzz Revisited
    
    from scratch.neural_networks import binary_encode, fizz_buzz_encode, argmax
    
    xs = [binary_encode(n) for n in range(101, 1024)]
    ys = [fizz_buzz_encode(n) for n in range(101, 1024)]
    
    NUM_HIDDEN = 25
    
    random.seed(0)
    
    net = Sequential([
        Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'),
        Tanh(),
        Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform'),
        Sigmoid()
    ])
    
    def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float:
        num_correct = 0
        for n in range(low, hi):
            x = binary_encode(n)
            predicted = argmax(net.forward(x))
            actual = argmax(fizz_buzz_encode(n))
            if predicted == actual:
                num_correct += 1
    
        return num_correct / (hi - low)
    
    optimizer = Momentum(learning_rate=0.1, momentum=0.9)
    loss = SSE()
    
    with tqdm.trange(1000) as t:
        for epoch in t:
            epoch_loss = 0.0
    
            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)
    
                optimizer.step(net)
    
            accuracy = fizzbuzz_accuracy(101, 1024, net)
            t.set_description(f"fb loss: {epoch_loss:.2f} acc: {accuracy:.2f}")
    
    # Now check results on the test set
    print("test results", fizzbuzz_accuracy(1, 101, net))
    
    random.seed(0)
    
    net = Sequential([
        Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'),
        Tanh(),
        Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform')
        # No final sigmoid layer now
    ])
    
    optimizer = Momentum(learning_rate=0.1, momentum=0.9)
    loss = SoftmaxCrossEntropy()
    
    with tqdm.trange(100) as t:
        for epoch in t:
            epoch_loss = 0.0
    
            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)
    
                optimizer.step(net)
    
            accuracy = fizzbuzz_accuracy(101, 1024, net)
            t.set_description(f"fb loss: {epoch_loss:.3f} acc: {accuracy:.2f}")
    
    # Again check results on the test set
    print("test results", fizzbuzz_accuracy(1, 101, net))
    
    
    # Load the MNIST data
    
    import mnist
    
    # This will download the data, change this to where you want it.
    # (Yes, it's a 0-argument function, that's what the library expects.)
    # (Yes, I'm assigning a lambda to a variable, like I said never to do.)
    mnist.temporary_dir = lambda: '/tmp'
    
    # Each of these functions first downloads the data and returns a numpy array.
    # We call .tolist() because our "tensors" are just lists.
    train_images = mnist.train_images().tolist()
    train_labels = mnist.train_labels().tolist()
    
    assert shape(train_images) == [60000, 28, 28]
    assert shape(train_labels) == [60000]
    
    import matplotlib.pyplot as plt
    
    fig, ax = plt.subplots(10, 10)
    
    for i in range(10):
        for j in range(10):
            # Plot each image in black and white and hide the axes.
            ax[i][j].imshow(train_images[10 * i + j], cmap='Greys')
            ax[i][j].xaxis.set_visible(False)
            ax[i][j].yaxis.set_visible(False)
    
    # plt.show()
    
    
    # Load the MNIST test data
    
    test_images = mnist.test_images().tolist()
    test_labels = mnist.test_labels().tolist()
    
    assert shape(test_images) == [10000, 28, 28]
    assert shape(test_labels) == [10000]
    
    
    # Recenter the images
    
    # Compute the average pixel value
    avg = tensor_sum(train_images) / 60000 / 28 / 28
    
    # Recenter, rescale, and flatten
    train_images = [[(pixel - avg) / 256 for row in image for pixel in row]
                    for image in train_images]
    test_images = [[(pixel - avg) / 256 for row in image for pixel in row]
                   for image in test_images]
    
    assert shape(train_images) == [60000, 784], "images should be flattened"
    assert shape(test_images) == [10000, 784], "images should be flattened"
    
    # After centering, average pixel should be very close to 0
    assert -0.0001 < tensor_sum(train_images) < 0.0001
    
    
    # One-hot encode the test data
    
    train_labels = [one_hot_encode(label) for label in train_labels]
    test_labels = [one_hot_encode(label) for label in test_labels]
    
    assert shape(train_labels) == [60000, 10]
    assert shape(test_labels) == [10000, 10]
    
    
    # Training loop
    
    import tqdm
    
    def loop(model: Layer,
             images: List[Tensor],
             labels: List[Tensor],
             loss: Loss,
             optimizer: Optimizer = None) -> None:
        correct = 0         # Track number of correct predictions.
        total_loss = 0.0    # Track total loss.
    
        with tqdm.trange(len(images)) as t:
            for i in t:
                predicted = model.forward(images[i])             # Predict.
                if argmax(predicted) == argmax(labels[i]):       # Check for
                    correct += 1                                 # correctness.
                total_loss += loss.loss(predicted, labels[i])    # Compute loss.
    
                # If we're training, backpropagate gradient and update weights.
                if optimizer is not None:
                    gradient = loss.gradient(predicted, labels[i])
                    model.backward(gradient)
                    optimizer.step(model)
    
                # And update our metrics in the progress bar.
                avg_loss = total_loss / (i + 1)
                acc = correct / (i + 1)
                t.set_description(f"mnist loss: {avg_loss:.3f} acc: {acc:.3f}")
    
    
    # The logistic regression model for MNIST
    
    random.seed(0)
    
    # Logistic regression is just a linear layer followed by softmax
    model = Linear(784, 10)
    loss = SoftmaxCrossEntropy()
    
    # This optimizer seems to work
    optimizer = Momentum(learning_rate=0.01, momentum=0.99)
    
    # Train on the training data
    loop(model, train_images, train_labels, loss, optimizer)
    
    # Test on the test data (no optimizer means just evaluate)
    loop(model, test_images, test_labels, loss)
    
    
    # A deep neural network for MNIST
    
    random.seed(0)
    
    # Name them so we can turn train on and off
    dropout1 = Dropout(0.1)
    dropout2 = Dropout(0.1)
    
    model = Sequential([
        Linear(784, 30),  # Hidden layer 1: size 30
        dropout1,
        Tanh(),
        Linear(30, 10),   # Hidden layer 2: size 10
        dropout2,
        Tanh(),
        Linear(10, 10)    # Output layer: size 10
    ])
    
    
    # Training the deep model for MNIST
    
    optimizer = Momentum(learning_rate=0.01, momentum=0.99)
    loss = SoftmaxCrossEntropy()
    
    # Enable dropout and train (takes > 20 minutes on my laptop!)
    dropout1.train = dropout2.train = True
    loop(model, train_images, train_labels, loss, optimizer)
    
    # Disable dropout and evaluate
    dropout1.train = dropout2.train = False
    loop(model, test_images, test_labels, loss)
def main():

    # XOR revisited

    # training data
    xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
    ys = [[0.], [1.], [1.], [0.]]

    random.seed(0)

    net = Sequential([
        Linear(input_dim=2, output_dim=2),
        Sigmoid(),
        Linear(input_dim=2, output_dim=1)
    ])

    import tqdm

    optimizer = GradientDescent(learning_rate=0.1)
    loss = SSE()

    with tqdm.trange(3000) as t:
        for epoch in t:
            epoch_loss = 0.0

            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)

                optimizer.step(net)

            t.set_description(f"xor loss {epoch_loss:.3f}")

    for param in net.params():
        print(param)

    # FizzBuzz Revisited

    from scratch.neural_networks import binary_encode, fizz_buzz_encode, argmax

    xs = [binary_encode(n) for n in range(101, 1024)]
    ys = [fizz_buzz_encode(n) for n in range(101, 1024)]

    NUM_HIDDEN = 25

    random.seed(0)

    net = Sequential([
        Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'),
        Tanh(),
        Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform'),
        Sigmoid()
    ])

    def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float:
        num_correct = 0
        for n in range(low, hi):
            x = binary_encode(n)
            predicted = argmax(net.forward(x))
            actual = argmax(fizz_buzz_encode(n))
            if predicted == actual:
                num_correct += 1

        return num_correct / (hi - low)

    optimizer = Momentum(learning_rate=0.1, momentum=0.9)
    loss = SSE()

    with tqdm.trange(1000) as t:
        for epoch in t:
            epoch_loss = 0.0

            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)

                optimizer.step(net)

            accuracy = fizzbuzz_accuracy(101, 1024, net)
            t.set_description(f"fb loss: {epoch_loss:.2f} acc: {accuracy:.2f}")

    # Now check results on the test set
    print("test results", fizzbuzz_accuracy(1, 101, net))

    random.seed(0)

    net = Sequential([
        Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'),
        Tanh(),
        Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform')
        # No final sigmoid layer now
    ])

    optimizer = Momentum(learning_rate=0.1, momentum=0.9)
    loss = SoftmaxCrossEntropy()

    with tqdm.trange(100) as t:
        for epoch in t:
            epoch_loss = 0.0

            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)

                optimizer.step(net)

            accuracy = fizzbuzz_accuracy(101, 1024, net)
            t.set_description(f"fb loss: {epoch_loss:.3f} acc: {accuracy:.2f}")

    # Again check results on the test set
    print("test results", fizzbuzz_accuracy(1, 101, net))

    # Load the MNIST data

    import mnist

    # This will download the data, change this to where you want it.
    # (Yes, it's a 0-argument function, that's what the library expects.)
    # (Yes, I'm assigning a lambda to a variable, like I said never to do.)
    mnist.temporary_dir = lambda: '/tmp'

    # Each of these functions first downloads the data and returns a numpy array.
    # We call .tolist() because our "tensors" are just lists.
    train_images = mnist.train_images().tolist()
    train_labels = mnist.train_labels().tolist()

    assert shape(train_images) == [60000, 28, 28]
    assert shape(train_labels) == [60000]

    import matplotlib.pyplot as plt

    fig, ax = plt.subplots(10, 10)

    for i in range(10):
        for j in range(10):
            # Plot each image in black and white and hide the axes.
            ax[i][j].imshow(train_images[10 * i + j], cmap='Greys')
            ax[i][j].xaxis.set_visible(False)
            ax[i][j].yaxis.set_visible(False)

    # plt.show()

    # Load the MNIST test data

    test_images = mnist.test_images().tolist()
    test_labels = mnist.test_labels().tolist()

    assert shape(test_images) == [10000, 28, 28]
    assert shape(test_labels) == [10000]

    # Recenter the images

    # Compute the average pixel value
    avg = tensor_sum(train_images) / 60000 / 28 / 28

    # Recenter, rescale, and flatten
    train_images = [[(pixel - avg) / 256 for row in image for pixel in row]
                    for image in train_images]
    test_images = [[(pixel - avg) / 256 for row in image for pixel in row]
                   for image in test_images]

    assert shape(train_images) == [60000, 784], "images should be flattened"
    assert shape(test_images) == [10000, 784], "images should be flattened"

    # After centering, average pixel should be very close to 0
    assert -0.0001 < tensor_sum(train_images) < 0.0001

    # One-hot encode the test data

    train_labels = [one_hot_encode(label) for label in train_labels]
    test_labels = [one_hot_encode(label) for label in test_labels]

    assert shape(train_labels) == [60000, 10]
    assert shape(test_labels) == [10000, 10]

    # Training loop

    import tqdm

    def loop(model: Layer,
             images: List[Tensor],
             labels: List[Tensor],
             loss: Loss,
             optimizer: Optimizer = None) -> None:
        correct = 0  # Track number of correct predictions.
        total_loss = 0.0  # Track total loss.

        with tqdm.trange(len(images)) as t:
            for i in t:
                predicted = model.forward(images[i])  # Predict.
                if argmax(predicted) == argmax(labels[i]):  # Check for
                    correct += 1  # correctness.
                total_loss += loss.loss(predicted, labels[i])  # Compute loss.

                # If we're training, backpropagate gradient and update weights.
                if optimizer is not None:
                    gradient = loss.gradient(predicted, labels[i])
                    model.backward(gradient)
                    optimizer.step(model)

                # And update our metrics in the progress bar.
                avg_loss = total_loss / (i + 1)
                acc = correct / (i + 1)
                t.set_description(f"mnist loss: {avg_loss:.3f} acc: {acc:.3f}")

    # The logistic regression model for MNIST

    random.seed(0)

    # Logistic regression is just a linear layer followed by softmax
    model = Linear(784, 10)
    loss = SoftmaxCrossEntropy()

    # This optimizer seems to work
    optimizer = Momentum(learning_rate=0.01, momentum=0.99)

    # Train on the training data
    loop(model, train_images, train_labels, loss, optimizer)

    # Test on the test data (no optimizer means just evaluate)
    loop(model, test_images, test_labels, loss)

    # A deep neural network for MNIST

    random.seed(0)

    # Name them so we can turn train on and off
    dropout1 = Dropout(0.1)
    dropout2 = Dropout(0.1)

    model = Sequential([
        Linear(784, 30),  # Hidden layer 1: size 30
        dropout1,
        Tanh(),
        Linear(30, 10),  # Hidden layer 2: size 10
        dropout2,
        Tanh(),
        Linear(10, 10)  # Output layer: size 10
    ])

    # Training the deep model for MNIST

    optimizer = Momentum(learning_rate=0.01, momentum=0.99)
    loss = SoftmaxCrossEntropy()

    # Enable dropout and train (takes > 20 minutes on my laptop!)
    dropout1.train = dropout2.train = True
    loop(model, train_images, train_labels, loss, optimizer)

    # Disable dropout and evaluate
    dropout1.train = dropout2.train = False
    loop(model, test_images, test_labels, loss)
Example #5
0
def main():

    # kolejne podejście do bramki XOR

    # dane treningowe
    xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
    ys = [[0.], [1.], [1.], [0.]]

    random.seed(0)

    net = Sequential([
        Linear(input_dim=2, output_dim=2),
        Sigmoid(),
        Linear(input_dim=2, output_dim=1)
    ])

    import tqdm

    optimizer = GradientDescent(learning_rate=0.1)
    loss = SSE()

    with tqdm.trange(3000) as t:
        for epoch in t:
            epoch_loss = 0.0

            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)

                optimizer.step(net)

            t.set_description(f"xor loss {epoch_loss:.3f}")

    for param in net.params():
        print(param)

    # kolejne podejście do gry Fizz Buzz

    from scratch.neural_networks import binary_encode, fizz_buzz_encode, argmax

    xs = [binary_encode(n) for n in range(101, 1024)]
    ys = [fizz_buzz_encode(n) for n in range(101, 1024)]

    NUM_HIDDEN = 25

    random.seed(0)

    net = Sequential([
        Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'),
        Tanh(),
        Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform'),
        Sigmoid()
    ])

    def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float:
        num_correct = 0
        for n in range(low, hi):
            x = binary_encode(n)
            predicted = argmax(net.forward(x))
            actual = argmax(fizz_buzz_encode(n))
            if predicted == actual:
                num_correct += 1

        return num_correct / (hi - low)

    optimizer = Momentum(learning_rate=0.1, momentum=0.9)
    loss = SSE()

    with tqdm.trange(1000) as t:
        for epoch in t:
            epoch_loss = 0.0

            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)

                optimizer.step(net)

            accuracy = fizzbuzz_accuracy(101, 1024, net)
            t.set_description(f"fb loss: {epoch_loss:.2f} acc: {accuracy:.2f}")

    # teraz sprawdźmy wyniki na zbiorze testowym
    print("test results", fizzbuzz_accuracy(1, 101, net))

    random.seed(0)

    net = Sequential([
        Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'),
        Tanh(),
        Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform')
        # teraz bez końcowej warstwy sigmoid
    ])

    optimizer = Momentum(learning_rate=0.1, momentum=0.9)
    loss = SoftmaxCrossEntropy()

    with tqdm.trange(100) as t:
        for epoch in t:
            epoch_loss = 0.0

            for x, y in zip(xs, ys):
                predicted = net.forward(x)
                epoch_loss += loss.loss(predicted, y)
                gradient = loss.gradient(predicted, y)
                net.backward(gradient)

                optimizer.step(net)

            accuracy = fizzbuzz_accuracy(101, 1024, net)
            t.set_description(f"fb loss: {epoch_loss:.3f} acc: {accuracy:.2f}")

    # ponownie sprawdzamy wyniki na zbiorze testowym
    print("test results", fizzbuzz_accuracy(1, 101, net))

    # Pobranie danych MNIST

    import mnist

    # Ten fragment pobiera dane. Zmień ścieżkę na taką, jaką chcesz.
    # (Tak, to jest funkcja, która nie ma argumentów, tego właśnie oczekuje ta biblioteka).
    # (Tak, przypisuję lambdę do zmiennej, mimo że mówiłem, że nie należy tak robić).
    mnist.temporary_dir = lambda: '/tmp'

    # Każda z tych funkcji pobiera dane i zwraca tablicę numpy.
    # Wywołujemy .tolist(), ponieważ nasze "tensory" są tak naprawdę listami.
    train_images = mnist.train_images().tolist()
    train_labels = mnist.train_labels().tolist()

    assert shape(train_images) == [60000, 28, 28]
    assert shape(train_labels) == [60000]

    import matplotlib.pyplot as plt

    fig, ax = plt.subplots(10, 10)

    for i in range(10):
        for j in range(10):
            # Wyświetl każdy obraz jako czarno-biały i ukryj osie.
            ax[i][j].imshow(train_images[10 * i + j], cmap='Greys')
            ax[i][j].xaxis.set_visible(False)
            ax[i][j].yaxis.set_visible(False)

    plt.show()

    # Ładowanie danych testowych MNIST

    test_images = mnist.test_images().tolist()
    test_labels = mnist.test_labels().tolist()

    assert shape(test_images) == [10000, 28, 28]
    assert shape(test_labels) == [10000]

    # Centrowanie obrazów

    # Policz średnią wartość piksela
    avg = tensor_sum(train_images) / 60000 / 28 / 28

    # Centrowanie, skalowanie i spłaszczanie
    train_images = [[(pixel - avg) / 256 for row in image for pixel in row]
                    for image in train_images]
    test_images = [[(pixel - avg) / 256 for row in image for pixel in row]
                   for image in test_images]

    assert shape(train_images) == [60000,
                                   784], "obrazy powinny być spłaszczone"
    assert shape(test_images) == [10000, 784], "obrazy powinny być spłaszczone"

    # po centrowaniu wartość każdego piksela powinna być bliska 0
    assert -0.0001 < tensor_sum(train_images) < 0.0001

    # Kodowanie 1 z n, dane testowe

    train_labels = [one_hot_encode(label) for label in train_labels]
    test_labels = [one_hot_encode(label) for label in test_labels]

    assert shape(train_labels) == [60000, 10]
    assert shape(test_labels) == [10000, 10]

    # Pętla trenująca

    import tqdm

    def loop(model: Layer,
             images: List[Tensor],
             labels: List[Tensor],
             loss: Loss,
             optimizer: Optimizer = None) -> None:
        correct = 0  # Przechowuje liczbę poprawnych przewidywań.
        total_loss = 0.0  # Przechowuje całkowitą stratę.

        with tqdm.trange(len(images)) as t:
            for i in t:
                predicted = model.forward(
                    images[i])  # Określ wartości przewidywane.
                if argmax(predicted) == argmax(
                        labels[i]):  # Sprawdź poprawność.
                    correct += 1
                total_loss += loss.loss(predicted, labels[i])  # Oblicz stratę.

                # Podczas treningu propaguj wstecznie gradient i zaktualizuj wagi.
                if optimizer is not None:
                    gradient = loss.gradient(predicted, labels[i])
                    model.backward(gradient)
                    optimizer.step(model)

                # Zaktualizuj metryki na pasku postępu.
                avg_loss = total_loss / (i + 1)
                acc = correct / (i + 1)
                t.set_description(f"mnist loss: {avg_loss:.3f} acc: {acc:.3f}")

    # Model regresji logistycznej na danych MNIST

    random.seed(0)

    # Regresja logistyczna to po prostu warstwa liniowa wraz z funkcją softmax
    model = Linear(784, 10)
    loss = SoftmaxCrossEntropy()

    # Ten optymalizator wydaje się działać poprawnie
    optimizer = Momentum(learning_rate=0.01, momentum=0.99)

    # Trenowanie modelu na danych treningowych
    loop(model, train_images, train_labels, loss, optimizer)

    # Testowanie na danych testowych (dlatego brak optymalizatora)
    loop(model, test_images, test_labels, loss)

    # Głęboka sieć neuronowa na danych MNIST

    random.seed(0)

    # Nazywamy je, żeby mieć możliwość włączania i wyłączania.
    dropout1 = Dropout(0.1)
    dropout2 = Dropout(0.1)

    model = Sequential([
        Linear(784, 30),  # Ukryta warstwa 1: rozmiar 30
        dropout1,
        Tanh(),
        Linear(30, 10),  # Ukryta warstwa 2: rozmiar 10
        dropout2,
        Tanh(),
        Linear(10, 10)  # Warstwa wyjściowa: rozmiar 10
    ])

    # Trenowanie modelu głębokiego na danych MNIST

    optimizer = Momentum(learning_rate=0.01, momentum=0.99)
    loss = SoftmaxCrossEntropy()

    # Włącz warstwę dropout i rozpocznij trening (na moim laptopie zajęło to ponad 20 minut!)
    dropout1.train = dropout2.train = True
    loop(model, train_images, train_labels, loss, optimizer)

    # Wyłącz warstwę dropout i przetestuj
    dropout1.train = dropout2.train = False
    loop(model, test_images, test_labels, loss)