def __init__(self, input_units, hidden_units, output_units, w_initializers=initializers.Xavier(), recursive_w_initializers=initializers.Xavier(), ): # how many sets of biases?? self.w_x = w_initializers((input_units, hidden_units)) # input to hidden self.w_h = recursive_w_initializers((hidden_units, hidden_units)) # hidden to hidden self.w_y = w_initializers((hidden_units, output_units)) # hidden to output self.optimizer = None self._trainable = True
def test_overfitting(cifar, momentum): training = cifar.get_named_batches('data_batch_1').subset(100) net = Network() net.add_layer( layers.Linear(cifar.input_size, 50, 0, initializers.Xavier())) net.add_layer(layers.ReLU(50)) net.add_layer( layers.Linear(50, cifar.output_size, 0, initializers.Xavier())) net.add_layer(layers.Softmax(cifar.output_size)) opt = MomentumSGD(net, initial_learning_rate=0.005, momentum=momentum) opt.train(training, training, 400) costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val, opt.cost_train, opt.cost_val, 'images/overfit_mom{}.png'.format(momentum)) show_plot('images/overfit_mom{}.png'.format(momentum))
def test_vanilla(cifar): training = cifar.get_named_batches('data_batch_1') validation = cifar.get_named_batches('data_batch_2') net = Network() net.add_layer( layers.Linear(cifar.input_size, cifar.output_size, 0, initializers.Xavier())) net.add_layer(layers.Softmax(cifar.output_size)) opt = VanillaSGD(net, initial_learning_rate=0.01, decay_factor=0.99, shuffle=True) opt.train(training, validation, 100, 500) costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val, opt.cost_train, opt.cost_val, 'images/vanilla.png') show_plot('images/vanilla.png')
return (self.cost(dataset.one_hot_labels, None, Y), self.accuracy(dataset.one_hot_labels, None, Y)) if __name__ == '__main__': import layers import datasets import initializers import matplotlib.pyplot as plt cifar = datasets.CIFAR10() training = cifar.get_named_batches('data_batch_1', limit=4) net = Network() net.add_layer(layers.Linear(cifar.input_size, 50, 0, initializers.Xavier())) net.add_layer(layers.ReLU(50)) net.add_layer( layers.Linear(50, cifar.output_size, 0, initializers.Xavier())) net.add_layer(layers.Softmax(cifar.output_size)) Y = net.evaluate(training.images) print('Cost:', net.cost(training.one_hot_labels, None, Y)) print('Accuracy: {:.2%}'.format( net.accuracy(training.one_hot_labels, None, Y))) plt.subplot(1, 3, 1) plt.imshow(Y) plt.yticks(range(10), cifar.labels) plt.xlabel('Image number') plt.title('Probabilities')
def __init__(self, input_units, output_units, w_initializers=initializers.Xavier(), biases_initializer=initializers.Zeros()): self.weights = w_initializers((input_units, output_units)) self.biases = biases_initializer((output_units)) self.optimizer = None self._trainable = True
def create_and_train(training: Batch, validation: Batch, epochs: int, hidden_size: int, regularization: float, initial_learning_rate: float, decay_factor: float, momentum: float, train_id: str, test: Batch = None): """ Create and train a 2 layer network: - subtract mean of the training set - linear layer - relu - linear layer - softmax The only parameters that are fixed are the layer initializers and the batch size. :param train_id: :param training: :param validation: :param epochs: :param hidden_size: :param regularization: :param initial_learning_rate: :param decay_factor: :param momentum: :return: """ # Mean of the training set mu = training.mean() # Definition of the network net = Network() net.add_layer(layers.BatchNormalization(CIFAR10.input_size, mu)) net.add_layer(layers.Linear(CIFAR10.input_size, hidden_size, regularization, initializers.Xavier())) net.add_layer(layers.ReLU(hidden_size)) net.add_layer(layers.Linear(hidden_size, CIFAR10.output_size, regularization, initializers.Xavier())) net.add_layer(layers.Softmax(CIFAR10.output_size)) # Training opt = optimizers.MomentumSGD(net, initial_learning_rate, decay_factor, True, momentum) opt.train(training, validation, epochs, 10000) # Plotting plot = costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val, opt.cost_train, opt.cost_val, 'images/{}.png'.format(train_id)) result = { 'epochs': epochs, 'hidden_size': hidden_size, 'regularization': regularization, 'initial_learning_rate': initial_learning_rate, 'decay_factor': decay_factor, 'momentum': momentum, # 'net': net, # 'opt': opt, 'epoch_nums': opt.epoch_nums, 'cost_train': opt.cost_train, 'acc_train': opt.acc_train, 'cost_val': opt.cost_val, 'acc_val': opt.acc_val, 'final_cost_train': opt.cost_train[-1], 'final_acc_train': opt.acc_train[-1], 'final_cost_val': opt.cost_val[-1], 'final_acc_val': opt.acc_val[-1], 'plot': plot } # Test set if test is not None: result['final_cost_test'], result['final_acc_test'] = net.cost_accuracy(test) result['confusion_matrix'] = confusion_matrix_plot(net, test, CIFAR10().labels, 'images/{}_conf.png'.format(train_id)) return result
np.abs(grad).max(), np.abs(grad_num).max())) if __name__ == '__main__': import layers import initializers import datasets cifar = datasets.CIFAR10() training = cifar.get_named_batches('data_batch_1').subset(50) # One layer network with regularization net = Network() linear = layers.Linear(cifar.input_size, cifar.output_size, 0.2, initializers.Xavier()) net.add_layer(linear) net.add_layer(layers.Softmax(cifar.output_size)) outputs = net.evaluate(training.images) net.backward(training.one_hot_labels) cost = net.cost(training.one_hot_labels, outputs=outputs) # Weights matrix grad_num = compute_grads_for_matrix(training.one_hot_labels, training.images, linear.W, net, cost) print_grad_diff(linear.grad_W, grad_num, 'Grad W') # Biases matrix grad_num = compute_grads_for_matrix(training.one_hot_labels, training.images, linear.b, net, cost)