def least_squares_fit(xs: List[Vector], ys: List[float], learning_rate: float = 0.001, num_steps: int = 1000, batch_size: int = 1) -> Vector: """ Find the beta that minimizes the sum of squared errors assuming the model y = dot(x, beta) """ # Start with a random guess guess = [random.random() for _ in xs[0]] for _ in tqdm.trange(num_steps, desc="least squares fit"): for start in range(0, len(xs), batch_size): batch_xs = xs[start:start + batch_size] batch_ys = ys[start:start + batch_size] gradient = vector_mean([ sqerror_gradient(x, y, guess) for x, y in zip(batch_xs, batch_ys) ]) guess = gradient_step(guess, gradient, -learning_rate) return guess
def train(network: List[List[Vector]], xs: Vector, ys: Vector, epochs: int, learning_rate: float) -> List[List[Vector]]: for epoch in tqdm.trange(epochs, desc="Neural network for xor"): for x, y in zip(xs, ys): gradients = sqerror_gradients(network, x, y) # Take a gradient step for each neuron in each layer network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] return network;
def first_principal_component(data: List[Vector], n: int = 100, step_size: float = 0.1) -> Vector: # Start with a random guess guess = [1.0 for _ in data[0]] with tqdm.trange(n) as t: for _ in t: dv = directional_variance(data, guess) gradient = directional_variance_gradient(data, guess) guess = gradient_step(guess, gradient, step_size) t.set_description(f"dv: {dv:.3f}") return direction(guess)
def first_principal_component(data: List[Vector], n: int = 100, step_size: float = 0.1) -> Vector: # Start with a random guess guess = [1.0 for _ in data[0]] with tqdm.trange(n) as t: for _ in t: dv = directional_variance(data, guess) gradient = directional_variance_gradient(data, guess) guess = gradient_step(guess, gradient, step_size) t.set_description(f"dv: {dv:.3f}") return direction(guess)
def least_squares_fit(xs: List[Vector], ys: List[float], learning_rate: float = 0.001, num_steps: int = 1000, batch_size: int = 1) -> Vector: guess = [random.random() for _ in xs[0]] for _ in tqdm.trange(num_steps, desc="least squares fit"): for start in range(0, len(xs), batch_size): batch_xs = xs[start:start + batch_size] batch_ys = ys[start:start + batch_size] gradient = vector_mean([ squared_gradient(x, y, guess) for x, y in zip(batch_xs, batch_ys) ]) guess = gradient_step(guess, gradient, -learning_rate) return guess
def least_squares_fit_ridge(xs: List[Vector], ys: List[float], alpha: float, learning_rate: float, num_steps: int, batch_size: int = 1) -> Vector: guess = [random.random() for _ in xs[0]] for i in range(num_steps): for start in range(0, len(xs), batch_size): batch_xs = xs[start:start + batch_size] batch_ys = ys[start:start + batch_size] gradient = vector_mean([ sqerror_ridge_gradient(x, y, guess, alpha) for x, y in zip(batch_xs, batch_ys) ]) guess = gradient_step(guess, gradient, -learning_rate) return guess
def main(): import random import tqdm from scratch.gradient_descent import gradient_step num_epochs = 10000 random.seed(0) guess = [random.random(), random.random()] # na początek wybierz wartość losową learning_rate = 0.00001 with tqdm.trange(num_epochs) as t: for _ in t: alpha, beta = guess # Pochodna cząstkowa straty w odniesieniu do alpha grad_a = sum( 2 * error(alpha, beta, x_i, y_i) for x_i, y_i in zip(num_friends_good, daily_minutes_good)) # Pochodna cząstkowa straty w odniesieniu do beta grad_b = sum( 2 * error(alpha, beta, x_i, y_i) * x_i for x_i, y_i in zip(num_friends_good, daily_minutes_good)) # Obliczamy stratę, aby wstawić do opisu tqdm loss = sum_of_sqerrors(alpha, beta, num_friends_good, daily_minutes_good) t.set_description(f"loss: {loss:.3f}") # Na koniec zaktualizuj przewidywanie guess = gradient_step(guess, [grad_a, grad_b], -learning_rate) # Powinniśmy otrzymać mniej więcej taki sam wynik: alpha, beta = guess assert 22.9 < alpha < 23.0 assert 0.9 < beta < 0.905
def main(): import random import tqdm from scratch.gradient_descent import gradient_step num_epochs = 10000 random.seed(0) guess = [random.random(), random.random()] # choose random value to start learning_rate = 0.00001 with tqdm.trange(num_epochs) as t: for _ in t: alpha, beta = guess # Partial derivative of loss with respect to alpha grad_a = sum( 2 * error(alpha, beta, x_i, y_i) for x_i, y_i in zip(num_friends_good, daily_minutes_good)) # Partial derivative of loss with respect to beta grad_b = sum( 2 * error(alpha, beta, x_i, y_i) * x_i for x_i, y_i in zip(num_friends_good, daily_minutes_good)) # Compute loss to stick in the tqdm description loss = sum_of_sqerrors(alpha, beta, num_friends_good, daily_minutes_good) t.set_description(f"loss: {loss:.3f}") # Finally, update the guess guess = gradient_step(guess, [grad_a, grad_b], -learning_rate) # We should get pretty much the same results: alpha, beta = guess assert 22.9 < alpha < 23.0 assert 0.9 < beta < 0.905
def main(): import random import tqdm from scratch.gradient_descent import gradient_step num_epochs = 10000 random.seed(0) guess = [random.random(), random.random()] # choose random value to start learning_rate = 0.00001 with tqdm.trange(num_epochs) as t: for _ in t: alpha, beta = guess # Partial derivative of loss with respect to alpha grad_a = sum(2 * error(alpha, beta, x_i, y_i) for x_i, y_i in zip(num_friends_good, daily_minutes_good)) # Partial derivative of loss with respect to beta grad_b = sum(2 * error(alpha, beta, x_i, y_i) * x_i for x_i, y_i in zip(num_friends_good, daily_minutes_good)) # Compute loss to stick in the tqdm description loss = sum_of_sqerrors(alpha, beta, num_friends_good, daily_minutes_good) t.set_description(f"loss: {loss:.3f}") # Finally, update the guess guess = gradient_step(guess, [grad_a, grad_b], -learning_rate) # We should get pretty much the same results: alpha, beta = guess assert 22.9 < alpha < 23.0 assert 0.9 < beta < 0.905
def least_squares_fit(xs: List[Vector], ys: List[float], learning_rate: float = 0.001, num_steps: int = 1000, batch_size: int = 1) -> Vector: """ Znajdź beta, które minimalizuje sumę kwadratów błędów, zakładając model y = dot(x, beta). """ # Rozpoczynamy od losowej wartości guess = [random.random() for _ in xs[0]] for _ in tqdm.trange(num_steps, desc="least squares fit"): for start in range(0, len(xs), batch_size): batch_xs = xs[start:start + batch_size] batch_ys = ys[start:start + batch_size] gradient = vector_mean([ sqerror_gradient(x, y, guess) for x, y in zip(batch_xs, batch_ys) ]) guess = gradient_step(guess, gradient, -learning_rate) return guess
def gradient_descent(x: List[float] ,y: List[float]) -> float: num_epochs = 10000 random.seed(0) guess = [random.random(), random.random()] #choose random value to start learning_rate = 0.00001 with tqdm.trange(num_epochs) as t: for _ in t: alpha, beta = guess #Partial derivative of loss with respect to alpha grad_a = sum(2 * error(alpha, beta, x_i, y_i) for x_i, y_i in zip(x, y)) #Partial derivative of loss with respect to beta grad_b = sum(2 * error(alpha, beta, x_i, y_i) * x_i for x_i, y_i in zip(x, y)) #The loss is the error in our predicted value of m and c. The goal is to minimize this error to obtain the most accurate value of alpha,beta loss = sum_of_sqerrors(alpha, beta, x, y) t.set_description(f"loss: {loss:.3f}") #Finally, update the guess guess = gradient_step(guess, [grad_a, grad_b], -learning_rate) return guess #We expect a user with n friends to spend 22.95 + n * 0.903 minutes on the site each day #alpha, beta = gradient_descent(num_friends_good, daily_minutes_good) #print (alpha, beta) #assert 22.9 < alpha < 23.0 #assert 0.9 < beta < 0.905
def main(): import random random.seed(0) # dane treningowe xs = [[0., 0], [0., 1], [1., 0], [1., 1]] ys = [[0.], [1.], [1.], [0.]] # rozpocznij od losowych wag network = [ # warstwa ukryta: 2 wartości wejściowe -> 2 wartości wyjściowe [[random.random() for _ in range(2 + 1)], # pierwszy ukryty neuron [random.random() for _ in range(2 + 1)]], # drugi ukryty neuron # warstwa wyjściowa: 2 wartości wejściowe -> 1 wynik [[random.random() for _ in range(2 + 1)]] # pierwszy neuron wyjściowy ] from scratch.gradient_descent import gradient_step import tqdm learning_rate = 1.0 for epoch in tqdm.trange(20000, desc="neural net for xor"): for x, y in zip(xs, ys): gradients = sqerror_gradients(network, x, y) # Zrób krok w kierunku gradientu dla każdego neuronu, w każdej warstwie. network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] # sprawdź, czy faktycznie implementuje bramkę XOR assert feed_forward(network, [0, 0])[-1][0] < 0.01 assert feed_forward(network, [0, 1])[-1][0] > 0.99 assert feed_forward(network, [1, 0])[-1][0] > 0.99 assert feed_forward(network, [1, 1])[-1][0] < 0.01 xs = [binary_encode(n) for n in range(101, 1024)] ys = [fizz_buzz_encode(n) for n in range(101, 1024)] NUM_HIDDEN = 25 network = [ # warstwa ukryta: 10 wejść -> NUM_HIDDEN wyjść [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)], # warstwa wyjściowa: NUM_HIDDEN wejść -> 4 wyjść [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)] ] from scratch.linear_algebra import squared_distance learning_rate = 1.0 with tqdm.trange(500) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = feed_forward(network, x)[-1] epoch_loss += squared_distance(predicted, y) gradients = sqerror_gradients(network, x, y) # Zrób krok w kierunku gradientu dla każdego neuronu w każdej warstwie network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})") num_correct = 0 for n in range(1, 101): x = binary_encode(n) predicted = argmax(feed_forward(network, x)[-1]) actual = argmax(fizz_buzz_encode(n)) labels = [str(n), "fizz", "buzz", "fizzbuzz"] print(n, labels[predicted], labels[actual]) if predicted == actual: num_correct += 1 print(num_correct, "/", 100)
def main(): from matplotlib import pyplot as plt plt.close() plt.clf() plt.gca().clear() from matplotlib import pyplot as plt from scratch.working_with_data import rescale from scratch.multiple_regression import least_squares_fit, predict from scratch.gradient_descent import gradient_step learning_rate = 0.001 rescaled_xs = rescale(xs) beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1) # [0.26, 0.43, -0.43] predictions = [predict(x_i, beta) for x_i in rescaled_xs] plt.scatter(predictions, ys) plt.xlabel("predicted") plt.ylabel("actual") # plt.show() plt.savefig('im/linear_regression_for_probabilities.png') plt.close() from scratch.machine_learning import train_test_split import random import tqdm random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) learning_rate = 0.01 # pick a random starting point beta = [random.random() for _ in range(3)] with tqdm.trange(5000) as t: for epoch in t: gradient = negative_log_gradient(x_train, y_train, beta) beta = gradient_step(beta, gradient, -learning_rate) loss = negative_log_likelihood(x_train, y_train, beta) t.set_description(f"loss: {loss:.3f} beta: {beta}") from scratch.working_with_data import scale means, stdevs = scale(xs) beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] - beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1], beta[2] / stdevs[2]] # [8.9, 1.6, -0.000288] assert (negative_log_likelihood(xs, ys, beta_unscaled) == negative_log_likelihood( rescaled_xs, ys, beta)) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): prediction = logistic(dot(beta, x_i)) if y_i == 1 and prediction >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif prediction >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print(precision, recall) assert precision == 0.75 assert recall == 0.8 plt.clf() plt.gca().clear() predictions = [logistic(dot(beta, x_i)) for x_i in x_test] plt.scatter(predictions, y_test, marker='+') plt.xlabel("predicted probability") plt.ylabel("actual outcome") plt.title("Logistic Regression Predicted vs. Actual") # plt.show() plt.savefig('im/logistic_regression_predicted_vs_actual.png') plt.gca().clear()
step = scalar_multiply(step_size, gradient) return add(v, step) from scratch.gradient_descent import gradient_step import random, tqdm num_epochs = 10000 random.seed(0) guess = [random.random(), random.random()] # choose random value to start learning_rate = 0.00001 with tqdm.trange(num_epochs) as t: for _ in t: alpha, beta = guess # Partial derivative of loss with respect to alpha grad_a = sum(2 * error(alpha, beta, x_i, y_i) for x_i, y_i in zip(num_friends_good, daily_minutes_good)) # Partial derivative of loss with respect to beta grad_b = sum(2 * error(alpha, beta, x_i, y_i) * x_i for x_i, y_i in zip(num_friends_good, daily_minutes_good)) # Compute loss to stick in the tqdm description loss = sum_of_sqerrors(alpha, beta, num_friends_good, daily_minutes_good) t.set_description(f"loss: {loss:.3f}") # Finally, update the guess guess = gradient_step(guess, [grad_a, grad_b], -learning_rate) # We should get pretty much the same results: alpha, beta = guess print(alpha) print(beta)
def main(): from matplotlib import pyplot as plt plt.close() plt.clf() plt.gca().clear() from matplotlib import pyplot as plt from scratch.working_with_data import rescale from scratch.multiple_regression import least_squares_fit, predict from scratch.gradient_descent import gradient_step learning_rate = 0.001 rescaled_xs = rescale(xs) beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1) # [0.26, 0.43, -0.43] predictions = [predict(x_i, beta) for x_i in rescaled_xs] plt.scatter(predictions, ys) plt.xlabel("predicted") plt.ylabel("actual") # plt.show() plt.savefig('im/linear_regression_for_probabilities.png') plt.close() from scratch.machine_learning import train_test_split import random import tqdm random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) learning_rate = 0.01 # pick a random starting point beta = [random.random() for _ in range(3)] with tqdm.trange(5000) as t: for epoch in t: gradient = negative_log_gradient(x_train, y_train, beta) beta = gradient_step(beta, gradient, -learning_rate) loss = negative_log_likelihood(x_train, y_train, beta) t.set_description(f"loss: {loss:.3f} beta: {beta}") from scratch.working_with_data import scale means, stdevs = scale(xs) beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] - beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1], beta[2] / stdevs[2]] # [8.9, 1.6, -0.000288] assert (negative_log_likelihood(xs, ys, beta_unscaled) == negative_log_likelihood(rescaled_xs, ys, beta)) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): prediction = logistic(dot(beta, x_i)) if y_i == 1 and prediction >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif prediction >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print(precision, recall) assert precision == 0.75 assert recall == 0.8 plt.clf() plt.gca().clear() predictions = [logistic(dot(beta, x_i)) for x_i in x_test] plt.scatter(predictions, y_test, marker='+') plt.xlabel("predicted probability") plt.ylabel("actual outcome") plt.title("Logistic Regression Predicted vs. Actual") # plt.show() plt.savefig('im/logistic_regression_predicted_vs_actual.png') plt.gca().clear()
def main(): from matplotlib import pyplot as plt plt.close() plt.clf() plt.gca().clear() from matplotlib import pyplot as plt from scratch.working_with_data import rescale from scratch.multiple_regression import least_squares_fit, predict from scratch.gradient_descent import gradient_step learning_rate = 0.001 rescaled_xs = rescale(xs) beta = least_squares_fit(rescaled_xs, ys, learning_rate, 1000, 1) # [0.26, 0.43, -0.43] predictions = [predict(x_i, beta) for x_i in rescaled_xs] plt.scatter(predictions, ys) plt.xlabel("wartosc przewidywana") plt.ylabel("wartosc rzeczywista") # plt.show() plt.savefig('im/linear_regression_for_probabilities.png') plt.close() from scratch.machine_learning import train_test_split import random import tqdm random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_xs, ys, 0.33) learning_rate = 0.01 # Wybierz losowy punkt początkowy. beta = [random.random() for _ in range(3)] with tqdm.trange(5000) as t: for epoch in t: gradient = negative_log_gradient(x_train, y_train, beta) beta = gradient_step(beta, gradient, -learning_rate) loss = negative_log_likelihood(x_train, y_train, beta) t.set_description(f"loss: {loss:.3f} beta: {beta}") from scratch.working_with_data import scale means, stdevs = scale(xs) beta_unscaled = [(beta[0] - beta[1] * means[1] / stdevs[1] - beta[2] * means[2] / stdevs[2]), beta[1] / stdevs[1], beta[2] / stdevs[2]] # [8.9, 1.6, -0.000288] assert (negative_log_likelihood(xs, ys, beta_unscaled) == negative_log_likelihood( rescaled_xs, ys, beta)) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): prediction = logistic(dot(beta, x_i)) if y_i == 1 and prediction >= 0.5: # Wynik prawdziwie dodatni: użytkownik zapłacił i klasyfikator przewidział to poprawnie. true_positives += 1 elif y_i == 1: # Wynik fałszywie ujemny: użytkownik zapłacił, a klasyfikator tego nie przewidział. false_negatives += 1 elif prediction >= 0.5: # Wynik fałszywie dodatni: użytkownik nie zapłacił, a klasyfikator przewidział opłatę. false_positives += 1 else: # Wynik prawdziwie negatywny: użytkownik nie zapłacił i zostało to przewidziane przez klasyfikator. true_negatives += 1 precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) print(precision, recall) assert precision == 0.75 assert recall == 0.8 plt.clf() plt.gca().clear() predictions = [logistic(dot(beta, x_i)) for x_i in x_test] plt.scatter(predictions, y_test, marker='+') plt.xlabel("przewidywane prawdopodobienstwo") plt.ylabel("wynik rzeczywisty") plt.title("Porownanie wartosci rzeczywistych i przewidywanych") plt.show() plt.savefig('im/logistic_regression_predicted_vs_actual.png') plt.gca().clear()
def main(): import random random.seed(0) # training data xs = [[0., 0], [0., 1], [1., 0], [1., 1]] ys = [[0.], [1.], [1.], [0.]] # start with random weights network = [ # hidden layer: 2 inputs -> 2 outputs [[random.random() for _ in range(2 + 1)], # 1st hidden neuron [random.random() for _ in range(2 + 1)]], # 2nd hidden neuron # output layer: 2 inputs -> 1 output [[random.random() for _ in range(2 + 1)]] # 1st output neuron ] from scratch.gradient_descent import gradient_step import tqdm learning_rate = 1.0 for epoch in tqdm.trange(20000, desc="neural net for xor"): for x, y in zip(xs, ys): gradients = sqerror_gradients(network, x, y) # Take a gradient step for each neuron in each layer network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] # check that it learned XOR assert feed_forward(network, [0, 0])[-1][0] < 0.01 assert feed_forward(network, [0, 1])[-1][0] > 0.99 assert feed_forward(network, [1, 0])[-1][0] > 0.99 assert feed_forward(network, [1, 1])[-1][0] < 0.01 xs = [binary_encode(n) for n in range(101, 1024)] ys = [fizz_buzz_encode(n) for n in range(101, 1024)] NUM_HIDDEN = 25 network = [ # hidden layer: 10 inputs -> NUM_HIDDEN outputs [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)], # output_layer: NUM_HIDDEN inputs -> 4 outputs [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)] ] from scratch.linear_algebra import squared_distance learning_rate = 1.0 with tqdm.trange(500) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = feed_forward(network, x)[-1] epoch_loss += squared_distance(predicted, y) gradients = sqerror_gradients(network, x, y) # Take a gradient step for each neuron in each layer network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})") num_correct = 0 for n in range(1, 101): x = binary_encode(n) predicted = argmax(feed_forward(network, x)[-1]) actual = argmax(fizz_buzz_encode(n)) labels = [str(n), "fizz", "buzz", "fizzbuzz"] print(n, labels[predicted], labels[actual]) if predicted == actual: num_correct += 1 print(num_correct, "/", 100)
def main(): import random random.seed(0) # training data xs = [[0., 0], [0., 1], [1., 0], [1., 1]] ys = [[0.], [1.], [1.], [0.]] # start with random weights network = [ # hidden layer: 2 inputs -> 2 outputs [[random.random() for _ in range(2 + 1)], # 1st hidden neuron [random.random() for _ in range(2 + 1)]], # 2nd hidden neuron # output layer: 2 inputs -> 1 output [[random.random() for _ in range(2 + 1)]] # 1st output neuron ] from scratch.gradient_descent import gradient_step import tqdm learning_rate = 1.0 for epoch in tqdm.trange(20000, desc="neural net for xor"): for x, y in zip(xs, ys): gradients = sqerror_gradients(network, x, y) # Take a gradient step for each neuron in each layer network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] # check that it learned XOR assert feed_forward(network, [0, 0])[-1][0] < 0.01 assert feed_forward(network, [0, 1])[-1][0] > 0.99 assert feed_forward(network, [1, 0])[-1][0] > 0.99 assert feed_forward(network, [1, 1])[-1][0] < 0.01 xs = [binary_encode(n) for n in range(101, 1024)] ys = [fizz_buzz_encode(n) for n in range(101, 1024)] NUM_HIDDEN = 25 network = [ # hidden layer: 10 inputs -> NUM_HIDDEN outputs [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)], # output_layer: NUM_HIDDEN inputs -> 4 outputs [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)] ] from scratch.linear_algebra import squared_distance learning_rate = 1.0 with tqdm.trange(500) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = feed_forward(network, x)[-1] epoch_loss += squared_distance(predicted, y) gradients = sqerror_gradients(network, x, y) # Take a gradient step for each neuron in each layer network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)] t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})") num_correct = 0 for n in range(1, 101): x = binary_encode(n) predicted = argmax(feed_forward(network, x)[-1]) actual = argmax(fizz_buzz_encode(n)) labels = [str(n), "fizz", "buzz", "fizzbuzz"] print(n, labels[predicted], labels[actual]) if predicted == actual: num_correct += 1 print(num_correct, "/", 100)