def network_and_weights(request): np.random.seed(0) layers = [Layer(5, Identity)] + [Layer(5, request.param) for _ in range(3)] network = Network(layers) weights = Matrices(network.shapes) weights.flat = np.random.normal(0, 0.01, len(weights.flat)) return network, weights
def _delta_weights(self, delta_layers): # The gradient with respect to the weights is computed as the gradient # at the target neuron multiplied by the activation of the source # neuron. gradient = Matrices(self.network.shapes) prev_and_delta = zip(self.network.layers[:-1], delta_layers) for index, (previous, delta) in enumerate(prev_and_delta): # We want to tweak the bias weights so we need them in the # gradient. activations = np.insert(previous.outgoing, 0, 1) assert activations[0] == 1 gradient[index] = np.outer(activations, delta) return gradient
def _init_network(self): """Define model and initialize weights.""" self.network = Network(self.problem.layers) self.weights = Matrices(self.network.shapes) if self.load: loaded = np.load(self.load) assert loaded.shape == self.weights.shape, ( 'weights to load must match problem definition') self.weights.flat = loaded else: self.weights.flat = np.random.normal(self.problem.weight_mean, self.problem.weight_scale, len(self.weights.flat))
def __call__(self, weights, example): """ Modify each weight individually in both directions to calculate a numeric gradient of the weights. """ # We need a copy of the weights that we can modify to evaluate the cost # function on. modified = Matrices(weights.shapes, weights.flat.copy()) gradient = Matrices(weights.shapes) for i, connection in enumerate(weights): for j, original in np.ndenumerate(connection): # Sample above and below and compute costs. modified[i][j] = original + self.distance above = self._evaluate(modified, example) modified[i][j] = original - self.distance below = self._evaluate(modified, example) # Restore the original value so we can reuse the weight matrix # for the next iteration. modified[i][j] = original # Compute the numeric gradient. sample = (above - below) / (2 * self.distance) gradient[i][j] = sample return gradient
def random_matrices(shapes): np.random.seed(0) matrix = Matrices(shapes) matrix.flat = np.random.normal(0, 0.1, len(matrix.flat)) return matrix
def matrices(): return Matrices([(5, 8), (4, 2)])
def __call__(self, weights, examples): gradient = Matrices(weights.shapes) for example in examples: gradient += self.backprop(weights, example) return gradient / len(examples)
num_inputs = 784 num_outputs = 10 network = Network([ Layer(num_inputs, Identity), Layer(700, Relu), Layer(500, Relu), Layer(300, Relu), Layer(num_outputs, Softmax), ]) from layered.network import Matrices weight_scale = 0.01 weights = Matrices(network.shapes) weights.flat = np.random.normal(0, weight_scale, len(weights.flat)) from layered.cost import SquaredError from layered.gradient import Backprop from layered.optimization import GradientDecent backprop = Backprop(network, cost=SquaredError()) descent = GradientDecent() from layered.dataset import Mnist dataset = Mnist() for example in dataset.training: gradient = backprop(weights, example) weights = descent(weights, gradient, learning_rate=0.1)
momentum=0.3, weight_scale=0.01, weight_decay=1e-3, evaluate_every=5000, dataset=Mnist(), cost=Squared()) # Define model and initialize weights network = Network([ Layer(len(problem.dataset.training[0].data), Linear), Layer(700, Relu), Layer(500, Relu), Layer(300, Relu), Layer(len(problem.dataset.training[0].target), Sigmoid) ]) weights = Matrices(network.shapes) weights.flat = np.random.normal(0, problem.weight_scale, len(weights.flat)) # Classes needed during training backprop = ParallelBackprop(network, problem.cost) momentum = Momentum() decent = GradientDecent() decay = WeightDecay() plot = Plot() # Train the model repeats = repeated(problem.dataset.training, problem.training_rounds) batches = batched(repeats, problem.batch_size) for index, batch in enumerate(batches): gradient = backprop(weights, batch) gradient = momentum(gradient, problem.momentum)