def delta_propagation(delta, network): """Function for backpropagation the delta values. Args: delta: A list of all the delta values for the network. network: A multilayer network with L layers, weights W(j,i), activation function g. """ for l in range(network.num_layers() - 2, 0, -1): for n in range(network.get_layer(l).num_nodes): summation = 0.0 next_layer_nodes = network.get_layer(l + 1).nodes for nln in range(len(next_layer_nodes)): summation += next_layer_nodes[nln].weights[n] * delta[network.position_in_network(l + 1, nln)] # "blame" a node as much as its weight delta[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum) * summation
def learn_loop(delta, examples, network, alpha): """A loop representing the learning process. Args: delta: A list of all the delta values for the network. examples: A set of examples, each with input vector x and output vector y. network: A multilayer network with L layers, weights W(j,i), activation function g. alpha: The learning rate. """ for example in examples: load_and_feed(example.x, network) # compute the error at the output for j in range(network.output_layer.num_nodes): delta[network.position_in_network(network.num_layers() - 1, j)] = multilayer_network.sigmoid_derivative( network.output_layer.nodes[j].in_sum ) * (example.y[j] - network.output_layer.nodes[j].output) # propagate the deltas backward from output layer to input layer delta_propagation(delta, network) # update every weight in the network using deltas update_weights(delta, network, alpha)
def learn_loop(delta, delta_Wxi, delta_Whf, delta_Wht, delta_Wci, delta_Wxf, delta_Wcf, delta_Wxc, delta_Whc, delta_Wxo, delta_Who, delta_Wco, examples, network, alpha): for example in examples: load_and_feed(example.x, network) # compute the Mean squared error at the output for n in range(network.output_layer.num_nodes): #print("Output: ",network.output_layer.nodes[n].output) #loading the gradient of output layer initially delta[network.position_in_network(network.num_layers() - 1, n)] = \ multilayer_network.sigmoid_derivative(network.output_layer.nodes[n].in_sum) * \ (((example.y[n] - network.output_layer.nodes[n].output) ** 2) / 2) # back propagating through time: the gradients backward from output layer to input layer delta_propagation(delta, delta_Wxi, delta_Whf, delta_Wht, delta_Wci, delta_Wxf, delta_Wcf, delta_Wxc, delta_Whc, delta_Wxo, delta_Who, delta_Wco, network) # update every weight in the network using gradients update_weights(delta, delta_Wxi, delta_Whf, delta_Wht, delta_Wci, delta_Wxf, delta_Wcf, delta_Wxc, delta_Whc, delta_Wxo, delta_Who, delta_Wco, network, alpha)
def delta_propagation(delta, delta_Wxi, delta_Whf, delta_Wht, delta_Wci, delta_Wxf, delta_Wcf, delta_Wxc, delta_Whc, delta_Wxo, delta_Who, delta_Wco, network): for l in range(network.num_layers() - 2, 0, -1): for n in range(network.get_layer(l).num_nodes): if network.get_layer(l).is_lstm: if not network.get_layer(l + 1).is_lstm: #this layer is lstm and above layer is a hidden layer. Here, the above layer's weights and delta are considered as #summation. This summation is used for lstm weights delta. summation = 0.0 next_layer_nodes = network.get_layer(l + 1).nodes for nln in range(len(next_layer_nodes)): summation += next_layer_nodes[nln].weights[n] * delta[ network.position_in_network(l + 1, nln)] # "blame" a node as much as its weight #delta[network.position_in_network(l, n)] = \ # multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum) * summation delta_Wht[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wht) * summation delta_Wxi[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wxi) * summation delta_Wci[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wci) * summation delta_Wxf[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wxf) * summation delta_Whf[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Whf) * summation delta_Wcf[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wcf) * summation delta_Wxc[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wxc) * summation delta_Whc[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Whc) * summation delta_Wxo[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wxo) * summation delta_Who[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Who) * summation delta_Wco[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wco) * summation else: summation = 0.0 summation_Wxi = 0.0 summation_Wht = 0.0 summation_Wci = 0.0 summation_Wxf = 0.0 summation_Whf = 0.0 summation_Wcf = 0.0 summation_Wxc = 0.0 summation_Whc = 0.0 summation_Wxo = 0.0 summation_Who = 0.0 summation_Wco = 0.0 #this layer is a lstm layer and the above layer is a lstm layer. Here, the above layer's weights and delta's are considered as #summation. This summation is used for lstm weights delta. next_layer_nodes = network.get_layer(l + 1).nodes for nln in range(len(next_layer_nodes)): summation_Wxi += next_layer_nodes[nln].Wxi[ n] * delta_Wxi[network.position_in_network( l + 1, nln)] summation_Wht += next_layer_nodes[nln].Wht[ n] * delta_Wht[network.position_in_network( l + 1, nln)] summation_Wci += next_layer_nodes[nln].Wci[ n] * delta_Wci[network.position_in_network( l + 1, nln)] summation_Wxf += next_layer_nodes[nln].Wxf[ n] * delta_Wxf[network.position_in_network( l + 1, nln)] summation_Whf += next_layer_nodes[nln].Whf[ n] * delta_Whf[network.position_in_network( l + 1, nln)] summation_Wcf += next_layer_nodes[nln].Wcf[ n] * delta_Wcf[network.position_in_network( l + 1, nln)] summation_Wxc += next_layer_nodes[nln].Wxc[ n] * delta_Wxc[network.position_in_network( l + 1, nln)] summation_Whc += next_layer_nodes[nln].Whc[ n] * delta_Whc[network.position_in_network( l + 1, nln)] summation_Wxo += next_layer_nodes[nln].Wxo[ n] * delta_Wxo[network.position_in_network( l + 1, nln)] summation_Who += next_layer_nodes[nln].Who[ n] * delta_Who[network.position_in_network( l + 1, nln)] summation_Wco += next_layer_nodes[nln].Wco[ n] * delta_Wco[network.position_in_network( l + 1, nln)] # "blame" a node as much as its weight #delta[network.position_in_network(l, n)] = \ # multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum) * summation delta_Wxi[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wxi) * summation_Wxi delta_Wht[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wht) * summation_Wht delta_Wci[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wci) * summation_Wci delta_Wxf[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wxf) * summation_Wxf delta_Whf[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Whf) * summation_Whf delta_Wcf[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wcf) * summation_Wcf delta_Wxc[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wxc) * summation_Wxc delta_Whc[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Whc) * summation_Whc delta_Wxo[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wxo) * summation_Wxo delta_Who[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Who) * summation_Who delta_Wco[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum_Wco) * summation_Wco else: if network.get_layer(l + 1).is_lstm: summation = 0.0 next_layer_nodes = network.get_layer(l + 1).nodes #this layer is a hidden layer and the above layer is a lstm layer. The delta of the lstm weights are combined #to form a summation. for nln in range(len(next_layer_nodes)): summation += \ (next_layer_nodes[nln].Wxi[n] * delta_Wxi[network.position_in_network(l + 1, nln)]) + \ (next_layer_nodes[nln].Wht[n] * delta_Wht[network.position_in_network(l + 1, nln)]) + \ (next_layer_nodes[nln].Wci[n] * delta_Wci[network.position_in_network(l + 1, nln)]) + \ (next_layer_nodes[nln].Wxf[n] * delta_Wxf[network.position_in_network(l + 1, nln)]) + \ (next_layer_nodes[nln].Whf[n] * delta_Whf[network.position_in_network(l + 1, nln)]) + \ (next_layer_nodes[nln].Wcf[n] * delta_Wcf[network.position_in_network(l + 1, nln)]) + \ (next_layer_nodes[nln].Wxc[n] * delta_Wxc[network.position_in_network(l + 1, nln)]) + \ (next_layer_nodes[nln].Whc[n] * delta_Whc[network.position_in_network(l + 1, nln)]) + \ (next_layer_nodes[nln].Wxo[n] * delta_Wxo[network.position_in_network(l + 1, nln)]) + \ (next_layer_nodes[nln].Who[n] * delta_Who[network.position_in_network(l + 1, nln)]) + \ (next_layer_nodes[nln].Wco[n] * delta_Wco[network.position_in_network(l + 1, nln)]) # "blame" a node as much as its weight delta[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum) * summation else: #this layer is a hidden layer and the above layer is a hidden layer too. #the backpropagation is done by the summation of multiplying the weights of the hidden layer nodes with that of respecitve deltas. summation = 0.0 next_layer_nodes = network.get_layer(l + 1).nodes for nln in range(len(next_layer_nodes)): summation += next_layer_nodes[nln].weights[n] * delta[ network.position_in_network(l + 1, nln)] # "blame" a node as much as its weight delta[network.position_in_network(l, n)] = \ multilayer_network.sigmoid_derivative(network.get_node_in_layer(l, n).in_sum) * summation