def squared_clustering_errors(inputs: List[Vector], k: int) -> float:
     """finds the total squared error from k-means clustering the inputs"""
     clusterer = KMeans(k)
     clusterer.train(inputs)
     means = clusterer.means
     assignments = [clusterer.classify(input) for input in inputs]
 
     return sum(squared_distance(input, means[cluster])
                for input, cluster in zip(inputs, assignments))
Exemple #2
0
def squared_clustering_errors(inputs: List[Vector], k: int) -> float:
    # finds the total squared error from k-means clustering the inputs
    clusterer = KMeans(k)
    clusterer.train(inputs)
    means = clusterer.means
    assignments = [clusterer.classify(input) for input in inputs]

    return sum(
        squared_distance(input, means[cluster])
        for input, cluster in zip(inputs, assignments))
    def squared_clustering_errors(inputs: List[Vector], k: int) -> float:
        """Określa sumę błędów podniesionych do kwadratu
        uzyskanych w wyniku działania algorytmu k średnich"""
        clusterer = KMeans(k)
        clusterer.train(inputs)
        means = clusterer.means
        assignments = [clusterer.classify(input) for input in inputs]

        return sum(
            squared_distance(input, means[cluster])
            for input, cluster in zip(inputs, assignments))
 def classify(self, input):
     return min(range(self.k),
                key=lambda i: squared_distance(input, self.means[i]))
def main():
    import random
    random.seed(0)
    
    # dane treningowe
    xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
    ys = [[0.], [1.], [1.], [0.]]
    
    # rozpocznij od losowych wag
    network = [ # warstwa ukryta: 2 wartości wejściowe -> 2 wartości wyjściowe
                [[random.random() for _ in range(2 + 1)],   # pierwszy ukryty neuron 
                 [random.random() for _ in range(2 + 1)]],  # drugi ukryty neuron
                # warstwa wyjściowa: 2 wartości wejściowe -> 1 wynik
                [[random.random() for _ in range(2 + 1)]]   # pierwszy neuron wyjściowy
              ]
    
    from scratch.gradient_descent import gradient_step
    import tqdm
    
    learning_rate = 1.0
    
    for epoch in tqdm.trange(20000, desc="neural net for xor"):
        for x, y in zip(xs, ys):
            gradients = sqerror_gradients(network, x, y)
    
            # Zrób krok w kierunku gradientu dla każdego neuronu, w każdej warstwie.
            network = [[gradient_step(neuron, grad, -learning_rate)
                        for neuron, grad in zip(layer, layer_grad)]
                       for layer, layer_grad in zip(network, gradients)]
    
    # sprawdź, czy faktycznie implementuje bramkę XOR
    assert feed_forward(network, [0, 0])[-1][0] < 0.01
    assert feed_forward(network, [0, 1])[-1][0] > 0.99
    assert feed_forward(network, [1, 0])[-1][0] > 0.99
    assert feed_forward(network, [1, 1])[-1][0] < 0.01
    
    xs = [binary_encode(n) for n in range(101, 1024)]
    ys = [fizz_buzz_encode(n) for n in range(101, 1024)]
    
    NUM_HIDDEN = 25
    
    network = [
        # warstwa ukryta: 10 wejść -> NUM_HIDDEN wyjść
        [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)],
    
        # warstwa wyjściowa: NUM_HIDDEN wejść -> 4 wyjść
        [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)]
    ]
    
    from scratch.linear_algebra import squared_distance
    
    learning_rate = 1.0
    
    with tqdm.trange(500) as t:
        for epoch in t:
            epoch_loss = 0.0
    
            for x, y in zip(xs, ys):
                predicted = feed_forward(network, x)[-1]
                epoch_loss += squared_distance(predicted, y)
                gradients = sqerror_gradients(network, x, y)
    
                # Zrób krok w kierunku gradientu dla każdego neuronu w każdej warstwie
                network = [[gradient_step(neuron, grad, -learning_rate)
                            for neuron, grad in zip(layer, layer_grad)]
                        for layer, layer_grad in zip(network, gradients)]
    
            t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})")
    
    num_correct = 0
    
    for n in range(1, 101):
        x = binary_encode(n)
        predicted = argmax(feed_forward(network, x)[-1])
        actual = argmax(fizz_buzz_encode(n))
        labels = [str(n), "fizz", "buzz", "fizzbuzz"]
        print(n, labels[predicted], labels[actual])
    
        if predicted == actual:
            num_correct += 1
    
    print(num_correct, "/", 100)
 def classify(self, input: Vector) -> int:
     """return the index of the cluster closest to the input"""
     return min(range(self.k),
                key=lambda i: squared_distance(input, self.means[i]))
def main():
    
    
    inputs: List[List[float]] = [[-14,-5],[13,13],[20,23],[-19,-11],[-9,-16],[21,27],[-49,15],[26,13],[-46,5],[-34,-1],[11,15],[-49,0],[-22,-16],[19,28],[-12,-8],[-13,-19],[-41,8],[-11,-6],[-25,-9],[-18,-3]]
    
    random.seed(12)                   # so you get the same results as me
    clusterer = KMeans(k=3)
    clusterer.train(inputs)
    means = sorted(clusterer.means)   # sort for the unit test
    
    assert len(means) == 3
    
    # Check that the means are close to what we expect.
    assert squared_distance(means[0], [-44, 5]) < 1
    assert squared_distance(means[1], [-16, -10]) < 1
    assert squared_distance(means[2], [18, 20]) < 1
    
    random.seed(0)
    clusterer = KMeans(k=2)
    clusterer.train(inputs)
    means = sorted(clusterer.means)
    
    assert len(means) == 2
    assert squared_distance(means[0], [-26, -5]) < 1
    assert squared_distance(means[1], [18, 20]) < 1
    
    from matplotlib import pyplot as plt
    
    def squared_clustering_errors(inputs: List[Vector], k: int) -> float:
        """finds the total squared error from k-means clustering the inputs"""
        clusterer = KMeans(k)
        clusterer.train(inputs)
        means = clusterer.means
        assignments = [clusterer.classify(input) for input in inputs]
    
        return sum(squared_distance(input, means[cluster])
                   for input, cluster in zip(inputs, assignments))
    
    # now plot from 1 up to len(inputs) clusters
    
    ks = range(1, len(inputs) + 1)
    errors = [squared_clustering_errors(inputs, k) for k in ks]
    
    plt.plot(ks, errors)
    plt.xticks(ks)
    plt.xlabel("k")
    plt.ylabel("total squared error")
    plt.title("Total Error vs. # of Clusters")
    # plt.show()
    
    
    
    plt.savefig('im/total_error_vs_num_clusters')
    plt.gca().clear()
    
    image_path = r"girl_with_book.jpg"    # wherever your image is
    import matplotlib.image as mpimg
    img = mpimg.imread(image_path) / 256  # rescale to between 0 and 1
    
    # .tolist() converts a numpy array to a Python list
    pixels = [pixel.tolist() for row in img for pixel in row]
    
    clusterer = KMeans(5)
    clusterer.train(pixels)   # this might take a while
    
    def recolor(pixel: Vector) -> Vector:
        cluster = clusterer.classify(pixel)        # index of the closest cluster
        return clusterer.means[cluster]            # mean of the closest cluster
    
    new_img = [[recolor(pixel) for pixel in row]   # recolor this row of pixels
               for row in img]                     # for each row in the image
    
    
    plt.close()
    
    plt.imshow(new_img)
    plt.axis('off')
    # plt.show()
    
    
    
    plt.savefig('im/recolored_girl_with_book.jpg')
    plt.gca().clear()
    
    base_cluster = bottom_up_cluster(inputs)
    
    three_clusters = [get_values(cluster)
                      for cluster in generate_clusters(base_cluster, 3)]
    
    
    
    # sort smallest to largest
    tc = sorted(three_clusters, key=len)
    assert len(tc) == 3
    assert [len(c) for c in tc] == [2, 4, 14]
    assert sorted(tc[0]) == [[11, 15], [13, 13]]
    
    
    plt.close()
    
    for i, cluster, marker, color in zip([1, 2, 3],
                                         three_clusters,
                                         ['D','o','*'],
                                         ['r','g','b']):
        xs, ys = zip(*cluster)  # magic unzipping trick
        plt.scatter(xs, ys, color=color, marker=marker)
    
        # put a number at the mean of the cluster
        x, y = vector_mean(cluster)
        plt.plot(x, y, marker='$' + str(i) + '$', color='black')
    
    plt.title("User Locations -- 3 Bottom-Up Clusters, Min")
    plt.xlabel("blocks east of city center")
    plt.ylabel("blocks north of city center")
    # plt.show()
    
    
    
    plt.savefig('im/bottom_up_clusters_min.png')
    plt.gca().clear()
    plt.close()
    
    
    
    base_cluster_max = bottom_up_cluster(inputs, max)
    three_clusters_max = [get_values(cluster)
                          for cluster in generate_clusters(base_cluster_max, 3)]
    
    for i, cluster, marker, color in zip([1, 2, 3],
                                         three_clusters_max,
                                         ['D','o','*'],
                                         ['r','g','b']):
        xs, ys = zip(*cluster)  # magic unzipping trick
        plt.scatter(xs, ys, color=color, marker=marker)
    
        # put a number at the mean of the cluster
        x, y = vector_mean(cluster)
        plt.plot(x, y, marker='$' + str(i) + '$', color='black')
    
    plt.title("User Locations -- 3 Bottom-Up Clusters, Max")
    plt.xlabel("blocks east of city center")
    plt.ylabel("blocks north of city center")
    plt.savefig('im/bottom_up_clusters_max.png')
    plt.gca().clear()
def main():
    import random
    random.seed(0)
    
    # training data
    xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
    ys = [[0.], [1.], [1.], [0.]]
    
    # start with random weights
    network = [ # hidden layer: 2 inputs -> 2 outputs
                [[random.random() for _ in range(2 + 1)],   # 1st hidden neuron
                 [random.random() for _ in range(2 + 1)]],  # 2nd hidden neuron
                # output layer: 2 inputs -> 1 output
                [[random.random() for _ in range(2 + 1)]]   # 1st output neuron
              ]
    
    from scratch.gradient_descent import gradient_step
    import tqdm
    
    learning_rate = 1.0
    
    for epoch in tqdm.trange(20000, desc="neural net for xor"):
        for x, y in zip(xs, ys):
            gradients = sqerror_gradients(network, x, y)
    
            # Take a gradient step for each neuron in each layer
            network = [[gradient_step(neuron, grad, -learning_rate)
                        for neuron, grad in zip(layer, layer_grad)]
                       for layer, layer_grad in zip(network, gradients)]
    
    # check that it learned XOR
    assert feed_forward(network, [0, 0])[-1][0] < 0.01
    assert feed_forward(network, [0, 1])[-1][0] > 0.99
    assert feed_forward(network, [1, 0])[-1][0] > 0.99
    assert feed_forward(network, [1, 1])[-1][0] < 0.01
    
    xs = [binary_encode(n) for n in range(101, 1024)]
    ys = [fizz_buzz_encode(n) for n in range(101, 1024)]
    
    NUM_HIDDEN = 25
    
    network = [
        # hidden layer: 10 inputs -> NUM_HIDDEN outputs
        [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)],
    
        # output_layer: NUM_HIDDEN inputs -> 4 outputs
        [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)]
    ]
    
    from scratch.linear_algebra import squared_distance
    
    learning_rate = 1.0
    
    with tqdm.trange(500) as t:
        for epoch in t:
            epoch_loss = 0.0
    
            for x, y in zip(xs, ys):
                predicted = feed_forward(network, x)[-1]
                epoch_loss += squared_distance(predicted, y)
                gradients = sqerror_gradients(network, x, y)
    
                # Take a gradient step for each neuron in each layer
                network = [[gradient_step(neuron, grad, -learning_rate)
                            for neuron, grad in zip(layer, layer_grad)]
                        for layer, layer_grad in zip(network, gradients)]
    
            t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})")
    
    num_correct = 0
    
    for n in range(1, 101):
        x = binary_encode(n)
        predicted = argmax(feed_forward(network, x)[-1])
        actual = argmax(fizz_buzz_encode(n))
        labels = [str(n), "fizz", "buzz", "fizzbuzz"]
        print(n, labels[predicted], labels[actual])
    
        if predicted == actual:
            num_correct += 1
    
    print(num_correct, "/", 100)
Exemple #9
0
 def classify(self, input: Vector) -> int:
     """return the index of the cluster closest to the input"""
     return min(range(self.k),
                key=lambda i: squared_distance(input, self.means[i]))
Exemple #10
0
def main():

    inputs: List[List[float]] = [[-14, -5], [13, 13], [20, 23], [-19, -11],
                                 [-9, -16], [21, 27], [-49, 15], [26, 13],
                                 [-46, 5], [-34, -1], [11, 15], [-49, 0],
                                 [-22, -16], [19, 28], [-12, -8], [-13, -19],
                                 [-41, 8], [-11, -6], [-25, -9], [-18, -3]]

    random.seed(12)  # so you get the same results as me
    clusterer = KMeans(k=3)
    clusterer.train(inputs)
    means = sorted(clusterer.means)  # sort for the unit test

    assert len(means) == 3

    # Check that the means are close to what we expect.
    assert squared_distance(means[0], [-44, 5]) < 1
    assert squared_distance(means[1], [-16, -10]) < 1
    assert squared_distance(means[2], [18, 20]) < 1

    random.seed(0)
    clusterer = KMeans(k=2)
    clusterer.train(inputs)
    means = sorted(clusterer.means)

    assert len(means) == 2
    assert squared_distance(means[0], [-26, -5]) < 1
    assert squared_distance(means[1], [18, 20]) < 1

    from matplotlib import pyplot as plt

    def squared_clustering_errors(inputs: List[Vector], k: int) -> float:
        """finds the total squared error from k-means clustering the inputs"""
        clusterer = KMeans(k)
        clusterer.train(inputs)
        means = clusterer.means
        assignments = [clusterer.classify(input) for input in inputs]

        return sum(
            squared_distance(input, means[cluster])
            for input, cluster in zip(inputs, assignments))

    # now plot from 1 up to len(inputs) clusters

    ks = range(1, len(inputs) + 1)
    errors = [squared_clustering_errors(inputs, k) for k in ks]

    plt.plot(ks, errors)
    plt.xticks(ks)
    plt.xlabel("k")
    plt.ylabel("total squared error")
    plt.title("Total Error vs. # of Clusters")
    # plt.show()

    plt.savefig('im/total_error_vs_num_clusters')
    plt.gca().clear()

    image_path = r"girl_with_book.jpg"  # wherever your image is
    import matplotlib.image as mpimg
    img = mpimg.imread(image_path) / 256  # rescale to between 0 and 1

    # .tolist() converts a numpy array to a Python list
    pixels = [pixel.tolist() for row in img for pixel in row]

    clusterer = KMeans(5)
    clusterer.train(pixels)  # this might take a while

    def recolor(pixel: Vector) -> Vector:
        cluster = clusterer.classify(pixel)  # index of the closest cluster
        return clusterer.means[cluster]  # mean of the closest cluster

    new_img = [
        [recolor(pixel) for pixel in row]  # recolor this row of pixels
        for row in img
    ]  # for each row in the image

    plt.close()

    plt.imshow(new_img)
    plt.axis('off')
    # plt.show()

    plt.savefig('im/recolored_girl_with_book.jpg')
    plt.gca().clear()

    base_cluster = bottom_up_cluster(inputs)

    three_clusters = [
        get_values(cluster) for cluster in generate_clusters(base_cluster, 3)
    ]

    # sort smallest to largest
    tc = sorted(three_clusters, key=len)
    assert len(tc) == 3
    assert [len(c) for c in tc] == [2, 4, 14]
    assert sorted(tc[0]) == [[11, 15], [13, 13]]

    plt.close()

    for i, cluster, marker, color in zip([1, 2, 3], three_clusters,
                                         ['D', 'o', '*'], ['r', 'g', 'b']):
        xs, ys = zip(*cluster)  # magic unzipping trick
        plt.scatter(xs, ys, color=color, marker=marker)

        # put a number at the mean of the cluster
        x, y = vector_mean(cluster)
        plt.plot(x, y, marker='$' + str(i) + '$', color='black')

    plt.title("User Locations -- 3 Bottom-Up Clusters, Min")
    plt.xlabel("blocks east of city center")
    plt.ylabel("blocks north of city center")
    # plt.show()

    plt.savefig('im/bottom_up_clusters_min.png')
    plt.gca().clear()
    plt.close()

    base_cluster_max = bottom_up_cluster(inputs, max)
    three_clusters_max = [
        get_values(cluster)
        for cluster in generate_clusters(base_cluster_max, 3)
    ]

    for i, cluster, marker, color in zip([1, 2, 3], three_clusters_max,
                                         ['D', 'o', '*'], ['r', 'g', 'b']):
        xs, ys = zip(*cluster)  # magic unzipping trick
        plt.scatter(xs, ys, color=color, marker=marker)

        # put a number at the mean of the cluster
        x, y = vector_mean(cluster)
        plt.plot(x, y, marker='$' + str(i) + '$', color='black')

    plt.title("User Locations -- 3 Bottom-Up Clusters, Max")
    plt.xlabel("blocks east of city center")
    plt.ylabel("blocks north of city center")
    plt.savefig('im/bottom_up_clusters_max.png')
    plt.gca().clear()
Exemple #11
0
 def classify(self, input: Vector) -> int:
     """Zwróć indeks najbliższego klastra."""
     return min(range(self.k),
                key=lambda i: squared_distance(input, self.means[i]))
Exemple #12
0
def main():

    inputs: List[List[float]] = [[-14, -5], [13, 13], [20, 23], [-19, -11],
                                 [-9, -16], [21, 27], [-49, 15], [26, 13],
                                 [-46, 5], [-34, -1], [11, 15], [-49, 0],
                                 [-22, -16], [19, 28], [-12, -8], [-13, -19],
                                 [-41, 8], [-11, -6], [-25, -9], [-18, -3]]

    random.seed(12)  # Dzięki temu uzyskasz taki sam wynik jak ja.
    clusterer = KMeans(k=3)
    clusterer.train(inputs)
    means = sorted(clusterer.means)  # sortowanie dla testów jednostkowych

    assert len(means) == 3

    # Sprawdź, czy średnie są takie, jakich oczekiwaliśmy
    assert squared_distance(means[0], [-44, 5]) < 1
    assert squared_distance(means[1], [-16, -10]) < 1
    assert squared_distance(means[2], [18, 20]) < 1

    random.seed(0)
    clusterer = KMeans(k=2)
    clusterer.train(inputs)
    means = sorted(clusterer.means)

    assert len(means) == 2
    assert squared_distance(means[0], [-26, -5]) < 1
    assert squared_distance(means[1], [18, 20]) < 1

    from matplotlib import pyplot as plt

    def squared_clustering_errors(inputs: List[Vector], k: int) -> float:
        """Określa sumę błędów podniesionych do kwadratu
        uzyskanych w wyniku działania algorytmu k średnich"""
        clusterer = KMeans(k)
        clusterer.train(inputs)
        means = clusterer.means
        assignments = [clusterer.classify(input) for input in inputs]

        return sum(
            squared_distance(input, means[cluster])
            for input, cluster in zip(inputs, assignments))

    # Wykonaj wykres dla podziału od 1 grupy do len(inputs) grup.

    ks = range(1, len(inputs) + 1)
    errors = [squared_clustering_errors(inputs, k) for k in ks]

    plt.plot(ks, errors)
    plt.xticks(ks)
    plt.xlabel("k")
    plt.ylabel("Suma kwadratow bledow")
    plt.title("Blad calkowity a liczba grup")
    plt.show()

    plt.savefig('im/total_error_vs_num_clusters')
    plt.gca().clear()

    image_path = r"girl_with_book.jpg"  # ścieżka pliku obrazu
    import matplotlib.image as mpimg
    img = mpimg.imread(
        image_path
    ) / 256  # przeskalujmy, aby uzyskać wartości z przedziału od 0 do 1

    # .tolist() konwertuje tablicę NumPy na obiekt list
    pixels = [pixel.tolist() for row in img for pixel in row]

    clusterer = KMeans(5)
    clusterer.train(pixels)  # Operacja ta może być czasochłonna.

    def recolor(pixel: Vector) -> Vector:
        cluster = clusterer.classify(pixel)  # indeks najbliższej grupy
        return clusterer.means[cluster]  # średnia najbliższej grupy

    new_img = [
        [recolor(pixel) for pixel in row]  # Zmień kolor tego rzędu pikseli.
        for row in img
    ]  # Wykonaj tę operację dla każdego wiersza obrazu.

    plt.close()

    plt.imshow(new_img)
    plt.axis('off')
    plt.show()

    plt.savefig('im/recolored_girl_with_book.jpg')
    plt.gca().clear()

    base_cluster = bottom_up_cluster(inputs)

    three_clusters = [
        get_values(cluster) for cluster in generate_clusters(base_cluster, 3)
    ]

    # posortuj od najmniejszego do największego
    tc = sorted(three_clusters, key=len)
    assert len(tc) == 3
    assert [len(c) for c in tc] == [2, 4, 14]
    assert sorted(tc[0]) == [[11, 15], [13, 13]]

    plt.close()

    for i, cluster, marker, color in zip([1, 2, 3], three_clusters,
                                         ['D', 'o', '*'], ['r', 'g', 'b']):
        xs, ys = zip(*cluster)  # rozpakowywanie
        plt.scatter(xs, ys, color=color, marker=marker)

        # Wprowadź średnią klastra.
        x, y = vector_mean(cluster)
        plt.plot(x, y, marker='$' + str(i) + '$', color='black')

    plt.title("Miejsca zamieszkania (3 grupy, metoda bottom-up, minimum)")
    plt.xlabel("Liczba przecznic na wschod od centrum miasta ")
    plt.ylabel("Liczba przecznic na polnoc od centrum miasta ")
    plt.show()

    plt.savefig('im/bottom_up_clusters_min.png')
    plt.gca().clear()
    plt.close()

    base_cluster_max = bottom_up_cluster(inputs, max)
    three_clusters_max = [
        get_values(cluster)
        for cluster in generate_clusters(base_cluster_max, 3)
    ]

    for i, cluster, marker, color in zip([1, 2, 3], three_clusters_max,
                                         ['D', 'o', '*'], ['r', 'g', 'b']):
        xs, ys = zip(*cluster)  # rozpakowywanie
        plt.scatter(xs, ys, color=color, marker=marker)

        # Wprowadź średnią klastra.
        x, y = vector_mean(cluster)
        plt.plot(x, y, marker='$' + str(i) + '$', color='black')

    plt.title("Miejsca zamieszkania (3 grupy, metoda bottom-up, maksimum)")
    plt.xlabel("Liczba przecznic na wschod od centrum miasta ")
    plt.ylabel("Liczba przecznic na polnoc od centrum miasta ")
    plt.savefig('im/bottom_up_clusters_max.png')
    plt.gca().clear()
def main():
    import random
    random.seed(0)
    
    # training data
    xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
    ys = [[0.], [1.], [1.], [0.]]
    
    # start with random weights
    network = [ # hidden layer: 2 inputs -> 2 outputs
                [[random.random() for _ in range(2 + 1)],   # 1st hidden neuron
                 [random.random() for _ in range(2 + 1)]],  # 2nd hidden neuron
                # output layer: 2 inputs -> 1 output
                [[random.random() for _ in range(2 + 1)]]   # 1st output neuron
              ]
    
    from scratch.gradient_descent import gradient_step
    import tqdm
    
    learning_rate = 1.0
    
    for epoch in tqdm.trange(20000, desc="neural net for xor"):
        for x, y in zip(xs, ys):
            gradients = sqerror_gradients(network, x, y)
    
            # Take a gradient step for each neuron in each layer
            network = [[gradient_step(neuron, grad, -learning_rate)
                        for neuron, grad in zip(layer, layer_grad)]
                       for layer, layer_grad in zip(network, gradients)]
    
    # check that it learned XOR
    assert feed_forward(network, [0, 0])[-1][0] < 0.01
    assert feed_forward(network, [0, 1])[-1][0] > 0.99
    assert feed_forward(network, [1, 0])[-1][0] > 0.99
    assert feed_forward(network, [1, 1])[-1][0] < 0.01
    
    xs = [binary_encode(n) for n in range(101, 1024)]
    ys = [fizz_buzz_encode(n) for n in range(101, 1024)]
    
    NUM_HIDDEN = 25
    
    network = [
        # hidden layer: 10 inputs -> NUM_HIDDEN outputs
        [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)],
    
        # output_layer: NUM_HIDDEN inputs -> 4 outputs
        [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)]
    ]
    
    from scratch.linear_algebra import squared_distance
    
    learning_rate = 1.0
    
    with tqdm.trange(500) as t:
        for epoch in t:
            epoch_loss = 0.0
    
            for x, y in zip(xs, ys):
                predicted = feed_forward(network, x)[-1]
                epoch_loss += squared_distance(predicted, y)
                gradients = sqerror_gradients(network, x, y)
    
                # Take a gradient step for each neuron in each layer
                network = [[gradient_step(neuron, grad, -learning_rate)
                            for neuron, grad in zip(layer, layer_grad)]
                        for layer, layer_grad in zip(network, gradients)]
    
            t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})")
    
    num_correct = 0
    
    for n in range(1, 101):
        x = binary_encode(n)
        predicted = argmax(feed_forward(network, x)[-1])
        actual = argmax(fizz_buzz_encode(n))
        labels = [str(n), "fizz", "buzz", "fizzbuzz"]
        print(n, labels[predicted], labels[actual])
    
        if predicted == actual:
            num_correct += 1
    
    print(num_correct, "/", 100)