def run_networks(): """Train networks using three different values for the learning rate, and store the cost curves in the file ``multiple_eta.json``, where they can later be used by ``make_plot``. """ # Make results more easily reproducible random.seed(12345678) np.random.seed(12345678) training_data, validation_data, test_data = mnist_loader.load_data_wrapper( ) results = [] for eta in LEARNING_RATES: print "\nTrain a network using eta = " + str(eta) net = network2.Network([784, 30, 10]) results.append( net.SGD(training_data, NUM_EPOCHS, 10, eta, lmbda=5.0, evaluation_data=validation_data, monitor_training_cost=True)) f = open("multiple_eta.json", "w") json.dump(results, f) f.close()
def run_network(filename, n, eta): """Train the network using both the default and the large starting weights. Store the results in the file with name ``filename``, where they can later be used by ``make_plots``. """ # Make results more easily reproducible random.seed(12345678) np.random.seed(12345678) training_data, validation_data, test_data = mnist_loader.load_data_wrapper( ) net = network2.Network([784, n, 10], cost=network2.CrossEntropyCost) print("Train the network using the default starting weights.") default_vc, default_va, default_tc, default_ta \ = net.SGD(training_data, 30, 10, eta, lmbda=5.0, evaluation_data=validation_data, monitor_evaluation_accuracy=True) print("Train the network using the large starting weights.") net.large_weight_initializer() large_vc, large_va, large_tc, large_ta \ = net.SGD(training_data, 30, 10, eta, lmbda=5.0, evaluation_data=validation_data, monitor_evaluation_accuracy=True) f = open(filename, "w") json.dump( { "default_weight_initialization": [default_vc, default_va, default_tc, default_ta], "large_weight_initialization": [large_vc, large_va, large_tc, large_ta] }, f) f.close()
def main(): # Load the data full_td, _, _ = mnist_loader.load_data_wrapper() td = full_td[:1000] # Just use the first 1000 items of training data epochs = 500 # Number of epochs to train for print "\nTwo hidden layers:" net = network2.Network([784, 30, 30, 10]) initial_norms(td, net) abbreviated_gradient = [ ag[:6] for ag in get_average_gradient(net, td)[:-1]] print "Saving the averaged gradient for the top six neurons in each "+\ "layer.\nWARNING: This will affect the look of the book, so be "+\ "sure to check the\nrelevant material (early chapter 5)." f = open("initial_gradient.json", "w") json.dump(abbreviated_gradient, f) f.close() shutil.copy("initial_gradient.json", "../../js/initial_gradient.json") training(td, net, epochs, "norms_during_training_2_layers.json") plot_training( epochs, "norms_during_training_2_layers.json", 2) print "\nThree hidden layers:" net = network2.Network([784, 30, 30, 30, 10]) initial_norms(td, net) training(td, net, epochs, "norms_during_training_3_layers.json") plot_training( epochs, "norms_during_training_3_layers.json", 3) print "\nFour hidden layers:" net = network2.Network([784, 30, 30, 30, 30, 10]) initial_norms(td, net) training(td, net, epochs, "norms_during_training_4_layers.json") plot_training( epochs, "norms_during_training_4_layers.json", 4)
def run_networks(): # Make results more easily reproducible random.seed(12345678) np.random.seed(12345678) training_data, validation_data, test_data = mnist_loader.load_data_wrapper( ) net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost()) accuracies = [] for size in SIZES: print "\n\nTraining network with data set size %s" % size net.large_weight_initializer() num_epochs = 1500000 / size net.SGD(training_data[:size], num_epochs, 10, 0.5, lmbda=size * 0.0001) accuracy = net.accuracy(validation_data) / 100.0 print "Accuracy was %s percent" % accuracy accuracies.append(accuracy) f = open("more_data.json", "w") json.dump(accuracies, f) f.close()
def run_network(filename, num_epochs, training_set_size=1000, lmbda=0.0): """Train the network for ``num_epochs`` on ``training_set_size`` images, and store the results in ``filename``. Those results can later be used by ``make_plots``. Note that the results are stored to disk in large part because it's convenient not to have to ``run_network`` each time we want to make a plot (it's slow). """ # Make results more easily reproducible random.seed(12345678) np.random.seed(12345678) training_data, validation_data, test_data = mnist_loader.load_data_wrapper() net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost()) net.large_weight_initializer() test_cost, test_accuracy, training_cost, training_accuracy \ = net.SGD(training_data[:training_set_size], num_epochs, 10, 0.5, evaluation_data=test_data, lmbda = lmbda, monitor_evaluation_cost=True, monitor_evaluation_accuracy=True, monitor_training_cost=True, monitor_training_accuracy=True) f = open(filename, "w") json.dump([test_cost, test_accuracy, training_cost, training_accuracy], f) f.close()
from mnist import mnist_loader, network2 # ========================================read data training_data, validation_data, test_data = mnist_loader.load_data_wrapper() training_data = list(training_data) # ========================================network # net_1 = network.Network([784, 30, 10]) # net_1.SGD(training_data, 30, 10, 3.0, test_data=test_data) # ========================================network2 net_2 = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost) net_2.large_weight_initializer() net_2.SGD(training_data, 30, 10, 0.1, lmbda=5.0, evaluation_data=validation_data, monitor_evaluation_accuracy=True) # ========================================network3