def back_propagation(self, x, y): """ 根据单个训练样本计算梯度 :param x: 样本的输入 :param y: 样本的标签 :return: 一个tuple,包括weights和biases的梯度 """ zs, activations = self.feed_forward(x) delta_weights = [np.zeros(e.shape) for e in self.weights] delta_biases = [np.zeros(e.shape) for e in self.biases] # 计算输出层的误差 delta = utils.cross_entropy_derivative( activations[-1], y) * utils.sigmoid_derivative(zs[-1]) # 后向传播误差 delta_weights[-1] = np.dot(delta, activations[-2].transpose()) delta_biases[-1] = delta for last in range(2, self.num_layers): z = zs[-last] delta = np.dot(self.weights[-last + 1].transpose(), delta) * utils.sigmoid_derivative(z) delta_weights[-last] = np.dot(delta, activations[-last - 1].transpose()) delta_biases[-last] = delta return delta_weights, delta_biases
def back_propagate(self, output, label): # normalize_output() error = label - output delta = np.multiply(error, sigmoid_derivative(self.layers[-1].input)) delta_weights = [] delta_bias = [] for i in xrange(len(self.weights), 0, -1): delta_w = self.layers[i - 1].output * delta.transpose() * self.learning_rate delta_b = self.learning_rate * delta delta_weights.append(delta_w) delta_bias.append(delta_b) error = self.weights[i-1] * delta delta = np.multiply(error, sigmoid_derivative(self.layers[i - 1].input)) self.update_weights(delta_weights, delta_bias)
def compute_gradient_back_propagation(self, inputs, expected_outputs): """ Computes the gradient with respect to the NN's parameters using back propagation. :param inputs: inputs to the network. :type inputs: list of numpy matrices. :param expected_outputs: expected outputs of the network. :type expected_outputs: list of numpy matrices. :return weights_gradient: gradients of the weights at each layer. :rtype weights_gradient: L-dimensional list of numpy matrices. :return biases_gradient: gradients of the biases at each layer. :rtype biases_gradient: L-dimensional list of numpy matrices. """ weights_gradient = [None] * 3 biases_gradient = [None] * 3 weights_gradient[1] = np.zeros((self.num_hiddens, self.num_inputs)) weights_gradient[2] = np.zeros((self.num_outputs, self.num_hiddens)) biases_gradient[1] = np.zeros((self.num_hiddens, 1)) biases_gradient[2] = np.zeros((self.num_outputs, 1)) # Add logic to compute the gradients num_cases = len(inputs) outputs = [None] * num_cases for i in range(num_cases): z, a = self.forward_propagation(inputs[i]) delta_2 = a[2] - expected_outputs[i] delta_1 = np.multiply(np.dot(self.weights[2].T, delta_2), sigmoid_derivative(z[1])) biases_gradient[2] += (1 / num_cases) * delta_2 weights_gradient[2] += (1 / num_cases) * np.dot(delta_2, a[1].T) biases_gradient[1] += (1 / num_cases) * delta_1 weights_gradient[1] += (1 / num_cases) * np.dot(delta_1, a[0].T) return weights_gradient, biases_gradient
def compute_gradient_back_propagation(self, inputs, expected_outputs): """ Computes the gradient with respect to the NN's parameters using back propagation. :param inputs: inputs to the network. :type inputs: list of numpy matrices. :param expected_outputs: expected outputs of the network. :type expected_outputs: list of numpy matrices. :return weights_gradient: gradients of the weights at each layer. :rtype weights_gradient: L-dimensional list of numpy matrices. :return biases_gradient: gradients of the biases at each layer. :rtype biases_gradient: L-dimensional list of numpy matrices. """ weights_gradient = [None] * 3 biases_gradient = [None] * 3 weights_gradient[1] = np.zeros((self.num_hiddens, self.num_inputs)) weights_gradient[2] = np.zeros((self.num_outputs, self.num_hiddens)) biases_gradient[1] = np.zeros((self.num_hiddens, 1)) biases_gradient[2] = np.zeros((self.num_outputs, 1)) # Add logic to compute the gradients num_cases = len(inputs) outputs = [None] * num_cases for i in range(num_cases): z, a = self.forward_propagation(inputs[i]) outputs[i] = a[-1] y = expected_outputs yhat = outputs delta = [None] * 3 delta[1] = np.zeros((self.num_hiddens, 1)) delta[2] = np.zeros((self.num_outputs, 1)) for c in range(self.num_outputs): delta[2][c] = yhat[i][c] - y[i][c] for k in range(self.num_hiddens): aux = 0 for c in range(self.num_outputs): b = np.array(self.weights[2][c]) aux += b[0][k] * delta[2][c] * sigmoid_derivative(z[1][k]) delta[1][k] = aux for c in range(self.num_outputs): for k in range(self.num_hiddens): weights_gradient[2][c][k] += np.asscalar(delta[2][c] * a[1][k]) biases_gradient[2][c] += delta[2][c] for k in range(self.num_hiddens): for j in range(self.num_inputs): weights_gradient[1][k][j] += np.asscalar(delta[1][k] * a[0][j]) biases_gradient[1][k] += delta[1][k] weights_gradient[1] /= num_cases weights_gradient[2] /= num_cases biases_gradient[1] /= num_cases biases_gradient[2] /= num_cases return weights_gradient, biases_gradient
def compute_gradient_back_propagation(self, inputs, expected_outputs): """ Computes the gradient with respect to the NN's parameters using back propagation. :param inputs: inputs to the network. :type inputs: list of numpy matrices. :param expected_outputs: expected outputs of the network. :type expected_outputs: list of numpy matrices. :return weights_gradient: gradients of the weights at each layer. :rtype weights_gradient: L-dimensional list of numpy matrices. :return biases_gradient: gradients of the biases at each layer. :rtype biases_gradient: L-dimensional list of numpy matrices. """ weights_gradient = [None] * 3 biases_gradient = [None] * 3 weights_gradient[1] = np.zeros((self.num_hiddens, self.num_inputs)) weights_gradient[2] = np.zeros((self.num_outputs, self.num_hiddens)) biases_gradient[1] = np.zeros((self.num_hiddens, 1)) biases_gradient[2] = np.zeros((self.num_outputs, 1)) num_cases = len(inputs) outputs = [None] * num_cases for i in range(num_cases): z, a = self.forward_propagation(inputs[i]) outputs[i] = a[-1] delta1 = np.zeros((self.num_hiddens, 1)) delta2 = np.zeros((self.num_outputs, 1)) # Get error from the last layer delta2 = outputs[i] - expected_outputs[i] # Update biases gradient based on mean biases_gradient[2] += delta2 / num_cases # Get error from the hidden layer for c in range(self.num_outputs): delta1 = delta1 + np.multiply( np.dot(self.weights[2][c].T, delta2[c]), sigmoid_derivative(z[1])) # Update biases gradient based on mean biases_gradient[1] += delta1 / num_cases # Update weights gradient based on mean weights_gradient[2] += np.dot(delta2, a[1].T) / num_cases weights_gradient[1] += np.dot(delta1, a[0].T) / num_cases return weights_gradient, biases_gradient
def LogisticLinearLeaner(dataset, learning_rate=0.01, epochs=100): """ [Section 18.6.4] Linear classifier with logistic regression. """ idx_i = dataset.inputs idx_t = dataset.target examples = dataset.examples num_examples = len(examples) # X transpose X_col = [dataset.values[i] for i in idx_i] # vertical columns of X # add dummy ones = [1 for _ in range(len(examples))] X_col = [ones] + X_col # initialize random weights num_weights = len(idx_i) + 1 w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights) for epoch in range(epochs): err = [] h = [] # pass over all examples for example in examples: x = [1] + example y = sigmoid(dot_product(w, x)) h.append(sigmoid_derivative(y)) t = example[idx_t] err.append(t - y) # update weights for i in range(len(w)): buffer = [x * y for x, y in zip(err, h)] w[i] = w[i] + learning_rate * (dot_product(buffer, X_col[i]) / num_examples) def predict(example): x = [1] + example return sigmoid(dot_product(w, x)) return predict
def compute_gradient_back_propagation(self, inputs, expected_outputs): """ Computes the gradient with respect to the NN's parameters using back propagation. :param inputs: inputs to the network. :type inputs: list of numpy matrices. :param expected_outputs: expected outputs of the network. :type expected_outputs: list of numpy matrices. :return weights_gradient: gradients of the weights at each layer. :rtype weights_gradient: L-dimensional list of numpy matrices. :return biases_gradient: gradients of the biases at each layer. :rtype biases_gradient: L-dimensional list of numpy matrices. """ weights_gradient = [None] * 3 biases_gradient = [None] * 3 weights_gradient[1] = np.zeros((self.num_hiddens, self.num_inputs)) weights_gradient[2] = np.zeros((self.num_outputs, self.num_hiddens)) biases_gradient[1] = np.zeros((self.num_hiddens, 1)) biases_gradient[2] = np.zeros((self.num_outputs, 1)) # Add logic to compute the gradients for i in range(len(inputs)): z, a = self.forward_propagation(inputs[i]) # dz[2] = a[2] - y # dW[2] = dz[2] a[1].T # db[2] = dz[2] # dz[1] = W[2].T dz[2] * g[1]'(z[1]) # dW[1] = dz[1] x.T # db[1] = dz[1] dz2 = np.matrix(a[2] - expected_outputs[i]) weights_gradient[2] += dz2 * a[1].T biases_gradient[2] += dz2 dz1 = np.matrix(np.multiply(self.weights[2].T * dz2, sigmoid_derivative(z[1]))) weights_gradient[1] += dz1 * inputs[i].T biases_gradient[1] += dz1 return weights_gradient, biases_gradient
def compute_gradient_back_propagation(self, inputs, expected_outputs): """ Computes the gradient with respect to the NN's parameters using back propagation. :param inputs: inputs to the network. :type inputs: (num_inputs, num_samples) numpy array. :param expected_outputs: expected outputs of the network. :type expected_outputs: (num_outputs, num_samples) numpy array. :return weights_gradient: gradients of the weights at each layer. :rtype weights_gradient: 3-dimensional list of numpy arrays. :return biases_gradient: gradients of the biases at each layer. :rtype biases_gradient: 3-dimensional list of numpy arrays. """ weights_gradient = [None] * 3 biases_gradient = [None] * 3 # Add logic to compute the gradients delta = [None] * 3 z, a = self.forward_propagation(inputs) y = expected_outputs y_hat = a[-1] delta[2] = (y_hat - y) delta[1] = (self.weights[2].T @ delta[2]) * sigmoid_derivative(z[1]) m = inputs.shape[1] weights_gradient[2] = delta[2] @ a[1].T / m biases_gradient[2] = np.array(np.mean(delta[2], axis=1)) biases_gradient[2] = biases_gradient[2].reshape( biases_gradient[2].shape[0], 1) weights_gradient[1] = delta[1] @ a[0].T / m biases_gradient[1] = np.array(np.mean(delta[1], axis=1)) biases_gradient[1] = biases_gradient[1].reshape( biases_gradient[1].shape[0], 1) return weights_gradient, biases_gradient
def train(X, y, alpha=1, epochs=10000, classes=[]): print("Training with alpha:%s" % (str(alpha))) print("Input matrix: %sx%s Output matrix: %sx%s" % (len(X), len(X[0]), len(X[0]), len(classes))) np.random.seed(1) last_mean_error = 1 synapse_0 = 2 * np.random.random((len(X[0]), len(classes))) - 1 layer_0 = X for j in iter(range(epochs + 1)): layer_1 = sigmoid(np.dot(layer_0, synapse_0)) layer_1_error = y - layer_1 if (j % 1000) == 0: error = np.mean(np.abs(layer_1_error)) if error >= last_mean_error or error < 1e-2: print('break:', error, ', ', last_mean_error) break print('delta after ', j, ' iters:', error) last_mean_error = error layer_1_delta = layer_1_error * sigmoid_derivative(layer_1) synapse_0_weight_update = layer_0.T.dot(layer_1_delta) synapse_0 += alpha * synapse_0_weight_update now = datetime.datetime.now() synapse = {'synapse0': synapse_0.tolist()} with open('synapses.json', 'w') as outfile: json.dump(synapse, outfile, indent=4) print('Train done.')
def BackPropagationLearner(dataset, net, learning_rate, epochs): """[Figure 18.23] The back-propagation algorithm for multilayer network""" # Initialise weights for layer in net: for node in layer: node.weights = random_weights(min_value=-0.5, max_value=0.5, num_weights=len(node.weights)) examples = dataset.examples ''' As of now dataset.target gives an int instead of list, Changing dataset class will have effect on all the learners. Will be taken care of later ''' o_nodes = net[-1] i_nodes = net[0] o_units = len(o_nodes) idx_t = dataset.target idx_i = dataset.inputs n_layers = len(net) inputs, targets = init_examples(examples, idx_i, idx_t, o_units) for epoch in range(epochs): # Iterate over each example for e in range(len(examples)): i_val = inputs[e] t_val = targets[e] # Activate input layer for v, n in zip(i_val, i_nodes): n.value = v # Forward pass for layer in net[1:]: for node in layer: inc = [n.value for n in node.inputs] in_val = dotproduct(inc, node.weights) node.value = node.activation(in_val) # Initialize delta delta = [[] for i in range(n_layers)] # Compute outer layer delta # Error for the MSE cost function err = [t_val[i] - o_nodes[i].value for i in range(o_units)] # The activation function used is the sigmoid function delta[-1] = [ sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units) ] # Backward pass h_layers = n_layers - 2 for i in range(h_layers, 0, -1): layer = net[i] h_units = len(layer) nx_layer = net[i + 1] # weights from each ith layer node to each i + 1th layer node w = [[node.weights[k] for node in nx_layer] for k in range(h_units)] delta[i] = [ sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1]) for j in range(h_units) ] # Update weights for i in range(1, n_layers): layer = net[i] inc = [node.value for node in net[i - 1]] units = len(layer) for j in range(units): layer[j].weights = vector_add( layer[j].weights, scalar_vector_product(learning_rate * delta[i][j], inc)) return net
[1], [0], [0], [1], ]) layer0 = X hidden_width = 4 synapse0 = generate_random_synapse(get_layer_width(X), hidden_width) synapse1 = generate_random_synapse(hidden_width, get_layer_width(y)) iterations = int(os.getenv("ITERATIONS", "100000")) for i in range(iterations): layer1 = forward_propagate(layer0, synapse0, sigmoid) layer2 = forward_propagate(layer1, synapse1, sigmoid) layer2_error = y - layer2 layer2_delta = layer2_error * sigmoid_derivative(layer2) layer1_error = np.dot(layer2_delta, synapse1.T) layer1_delta = layer1_error * sigmoid_derivative(layer1) synapse1 += np.dot(layer1.T, layer2_delta) synapse0 += np.dot(layer0.T, layer1_delta) if DEBUG & (i % (iterations / 10) == 0): print("Error: ", layer2_error) print("Output after training:") print(layer2)
def BackPropagationLearner(dataset, net, learning_rate, epochs): """[Figure 18.23] The back-propagation algorithm for multilayer network""" # Initialise weights for layer in net: for node in layer: node.weights = random_weights(min_value=-0.5, max_value=0.5, num_weights=len(node.weights)) examples = dataset.examples ''' As of now dataset.target gives an int instead of list, Changing dataset class will have effect on all the learners. Will be taken care of later ''' o_nodes = net[-1] i_nodes = net[0] o_units = len(o_nodes) idx_t = dataset.target idx_i = dataset.inputs n_layers = len(net) inputs, targets = init_examples(examples, idx_i, idx_t, o_units) for epoch in range(epochs): # Iterate over each example for e in range(len(examples)): i_val = inputs[e] t_val = targets[e] # Activate input layer for v, n in zip(i_val, i_nodes): n.value = v # Forward pass for layer in net[1:]: for node in layer: inc = [n.value for n in node.inputs] in_val = dotproduct(inc, node.weights) node.value = node.activation(in_val) # Initialize delta delta = [[] for i in range(n_layers)] # Compute outer layer delta # Error for the MSE cost function err = [t_val[i] - o_nodes[i].value for i in range(o_units)] # The activation function used is the sigmoid function delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] # Backward pass h_layers = n_layers - 2 for i in range(h_layers, 0, -1): layer = net[i] h_units = len(layer) nx_layer = net[i+1] # weights from each ith layer node to each i + 1th layer node w = [[node.weights[k] for node in nx_layer] for k in range(h_units)] delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) for j in range(h_units)] # Update weights for i in range(1, n_layers): layer = net[i] inc = [node.value for node in net[i-1]] units = len(layer) for j in range(units): layer[j].weights = vector_add(layer[j].weights, scalar_vector_product( learning_rate * delta[i][j], inc)) return net
def compute_gradient_back_propagation(self, inputs, expected_outputs): """ Computes the gradient with respect to the NN's parameters using back propagation :param inputs: inputs to the network. :type inputs: list of numpy matrices. :param expected_outputs: expected outputs of the network. :type expected_outputs: list of numpy matrices. :return weights_gradient: gradients of the weights at each layer. :rtype weights_gradient: L-dimensional list of numpy matrices. :return biases_gradient: gradients of the biases at each layer. :rtype biases_gradient: L-dimensional list of numpy matrices. """ weights_gradient = [None] * 3 biases_gradient = [None] * 3 weights_gradient[1] = np.zeros((self.num_hiddens, self.num_inputs)) weights_gradient[2] = np.zeros((self.num_outputs, self.num_hiddens)) biases_gradient[1] = np.zeros((self.num_hiddens, 1)) biases_gradient[2] = np.zeros((self.num_outputs, 1)) #read the inputs num_cases = len(inputs) outputs = [None] * num_cases for i in range(num_cases): delta = [None] * 3 delta[1] = np.zeros((self.num_hiddens, 1)) delta[2] = np.zeros((self.num_outputs, 1)) #catch the outputs z, a = self.forward_propagation(inputs[i]) outputs[i] = a[-1] #cálculo do delta_ delta[2] = outputs[i] - expected_outputs[i] #cálculo do delta_0 for k in range(self.num_hiddens): deltak = 0 for c in range(self.num_outputs): weight = self.weights[2][c, k] delta_1 = delta[2][c, 0] function = sigmoid_derivative(z[1][k, 0]) deltak = deltak + weight * delta_1 * function delta[1][k, 0] = deltak #cálculo do weights_gradient for c in range(self.num_outputs): for k in range(self.num_hiddens): delta_1 = delta[2][c, 0] output_1 = a[1][k, 0] weights_gradient[2][c, k] += delta_1 * output_1 #weights_gradient[2] = np.matrix(aux6) for k in range(self.num_hiddens): for j in range(self.num_inputs): delta_1 = delta[1][k, 0] output_1 = a[0][j, 0] weights_gradient[1][k, j] += delta_1 * output_1 biases_gradient[1] += delta[1] biases_gradient[2] += delta[2] weights_gradient[1] /= num_cases weights_gradient[2] /= num_cases biases_gradient[1] /= num_cases biases_gradient[2] /= num_cases return weights_gradient, biases_gradient
def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid, momentum=False, beta=0.903): """[Figure 18.23] The back-propagation algorithm for multilayer networks""" # Initialise weights for layer in net: for node in layer: node.weights = random_weights(min_value=-0.5, max_value=0.5, num_weights=len(node.weights)) examples = dataset.examples ''' As of now dataset.target gives an int instead of list, Changing dataset class will have effect on all the learners. Will be taken care of later. ''' o_nodes = net[-1] i_nodes = net[0] o_units = len(o_nodes) idx_t = dataset.target idx_i = dataset.inputs n_layers = len(net) inputs, targets = init_examples(examples, idx_i, idx_t, o_units) for epoch in range(epochs): # Iterate over each example for e in range(len(examples)): i_val = inputs[e] t_val = targets[e] # Activate input layer for v, n in zip(i_val, i_nodes): n.value = v # Finding the values of the nodes through forward propogation for layer in net[1:]: for node in layer: inc = [n.value for n in node.inputs] in_val = dotproduct(inc, node.weights) node.value = node.activation(in_val) # Initialize delta which stores the values of the gradients for each activation units delta = [[] for _ in range(n_layers)] #initializing the velocity_gradient if momentum == True: v_dw = [[0 for i in range(len(_))] for _ in net] # Compute outer layer delta # Error for the MSE cost function err = [t_val[i] - o_nodes[i].value for i in range(o_units)] # The activation function used is relu or sigmoid function # First backward fast if node.activation == sigmoid: delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] elif node.activation == relu: delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] elif node.activation == tanh: delta[-1] = [tanh_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] elif node.activation == elu: delta[-1] = [elu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] else: delta[-1] = [leaky_relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] # Propogating backward and finding gradients of nodes for each hidden layer h_layers = n_layers - 2 for i in range(h_layers, 0, -1): layer = net[i] h_units = len(layer) nx_layer = net[i+1] # weights from each ith layer node to each i + 1th layer node w = [[node.weights[k] for node in nx_layer] for k in range(h_units)] if activation == sigmoid: delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) for j in range(h_units)] elif activation == relu: delta[i] = [relu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) for j in range(h_units)] elif activation == tanh: delta[i] = [tanh_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) for j in range(h_units)] elif activation == elu: delta[i] = [elu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) for j in range(h_units)] else: delta[i] = [leaky_relu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) for j in range(h_units)] #optimization with velocity gradient t_ = epoch + 1 if momentum == True: if epoch == 0: for i in range(len(delta)): for j in range(len(delta[i])): v_dw[i][j] = ((1-beta)*delta[i][j])/(1-beta**(t_+1)) else: for i in range(len(delta)): for j in range(len(delta[i])): v_dw[i][j] = (beta*v_dw[i][j]+(1-beta)*delta[i][j])/(1-beta**(t_+1)) # Update weights with normal gradient descent if momentum == False: for i in range(1, n_layers): layer = net[i] inc = [node.value for node in net[i-1]] units = len(layer) for j in range(units): layer[j].weights = vector_add(layer[j].weights, scalar_vector_product( learning_rate * delta[i][j], inc )) # Update weights with velocity gradient optimizer in gradient descent else: for i in range(1, n_layers): layer = net[i] inc = [node.value for node in net[i-1]] units = len(layer) for j in range(units): layer[j].weights = vector_add(layer[j].weights, scalar_vector_product( learning_rate * v_dw[i][j], inc )) return net
def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid): """ [Figure 18.23] The back-propagation algorithm for multilayer networks. """ # initialise weights for layer in net: for node in layer: node.weights = random_weights(min_value=-0.5, max_value=0.5, num_weights=len(node.weights)) examples = dataset.examples # As of now dataset.target gives an int instead of list, # Changing dataset class will have effect on all the learners. # Will be taken care of later. o_nodes = net[-1] i_nodes = net[0] o_units = len(o_nodes) idx_t = dataset.target idx_i = dataset.inputs n_layers = len(net) inputs, targets = init_examples(examples, idx_i, idx_t, o_units) for epoch in range(epochs): # iterate over each example for e in range(len(examples)): i_val = inputs[e] t_val = targets[e] # activate input layer for v, n in zip(i_val, i_nodes): n.value = v # forward pass for layer in net[1:]: for node in layer: inc = [n.value for n in node.inputs] in_val = dot_product(inc, node.weights) node.value = node.activation(in_val) # initialize delta delta = [[] for _ in range(n_layers)] # compute outer layer delta # error for the MSE cost function err = [t_val[i] - o_nodes[i].value for i in range(o_units)] # calculate delta at output if node.activation == sigmoid: delta[-1] = [ sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units) ] elif node.activation == relu: delta[-1] = [ relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units) ] elif node.activation == tanh: delta[-1] = [ tanh_derivative(o_nodes[i].value) * err[i] for i in range(o_units) ] elif node.activation == elu: delta[-1] = [ elu_derivative(o_nodes[i].value) * err[i] for i in range(o_units) ] elif node.activation == leaky_relu: delta[-1] = [ leaky_relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units) ] else: return ValueError("Activation function unknown.") # backward pass h_layers = n_layers - 2 for i in range(h_layers, 0, -1): layer = net[i] h_units = len(layer) nx_layer = net[i + 1] # weights from each ith layer node to each i + 1th layer node w = [[node.weights[k] for node in nx_layer] for k in range(h_units)] if activation == sigmoid: delta[i] = [ sigmoid_derivative(layer[j].value) * dot_product(w[j], delta[i + 1]) for j in range(h_units) ] elif activation == relu: delta[i] = [ relu_derivative(layer[j].value) * dot_product(w[j], delta[i + 1]) for j in range(h_units) ] elif activation == tanh: delta[i] = [ tanh_derivative(layer[j].value) * dot_product(w[j], delta[i + 1]) for j in range(h_units) ] elif activation == elu: delta[i] = [ elu_derivative(layer[j].value) * dot_product(w[j], delta[i + 1]) for j in range(h_units) ] elif activation == leaky_relu: delta[i] = [ leaky_relu_derivative(layer[j].value) * dot_product(w[j], delta[i + 1]) for j in range(h_units) ] else: return ValueError("Activation function unknown.") # update weights for i in range(1, n_layers): layer = net[i] inc = [node.value for node in net[i - 1]] units = len(layer) for j in range(units): layer[j].weights = vector_add( layer[j].weights, scalar_vector_product(learning_rate * delta[i][j], inc)) return net