def populate(self, in_size, out_layer, hidden_layers=[]): """ initializes the layers according to size parameters @in_size input size for first lstm layer @out_size output size for output layer @memory_sizes sizes for state vectors of lstm layers """ layers = hidden_layers layers.append(out_layer) self.training_layers.append(CachingNeuralLayer( in_size=in_size, # input size equals output(=memory) size of preceding layer out_size=layers[0]["size"], activation_fn=layers[0]["fn"], activation_fn_deriv=layers[0]["fn_deriv"] )) # for each size in memory_sizes create another lstm layer for i in range(1, len(layers)): self.training_layers.append(CachingNeuralLayer( in_size=self.training_layers[-1].size, # input size equals output(=memory) size of preceding layer out_size=layers[i]["size"], activation_fn=layers[i]["fn"], activation_fn_deriv=layers[i]["fn_deriv"] )) # mark network as populated Logger.log(str(len(self.training_layers)) + " layers created.") self.populated = True
def learn(self, deltas, learning_rate=0.001): """ apply learning algorithm by using deltas from next layer @deltas deltas from last (actually next) layer """ Logger.debug("learn(" + str(deltas) + ")") # learn recursively over all caches (corresponds to time steps), starting with last cache self.learn_recursive(self.last_cache.predecessor, deltas) # apply pending weight and bias updates self.apply_training(learning_rate) # calculate and return deltas for this layer from losses deltas = [cache.loss_input for cache in self.caches] return deltas
def feed(self, input_data, time_steps=1): """ calculate output vector for given input vector @time_steps number of feedforward results to be cached for use in backpropagation through time """ assert time_steps > 0, "time_steps must be at least 1 (for recursive input)!" Logger.debug("feed(" + str(input_data) + ")") # concatenate input_vector with recurrent input (last output) vector concat_in = np.concatenate([input_data, self.last_cache.predecessor.output_values]) # delete first cache if maximum number of time_steps are already cached if time_steps <= len(self.caches): self.first_cache.successor.remove() self.caches.pop(0) # create new cache at end of cache list self.last_cache.insert_before(LSTMLayerCache()) cache = self.last_cache.predecessor self.caches.append(cache) # cache input and concatenated input values cache.input_values = input_data cache.concatenated_input = concat_in # calculate and cache gate/update_values results cache.forget_gate_results = self.forget_gate_layer.feed(concat_in) cache.input_gate_results = self.input_gate_layer.feed(concat_in) cache.update_values_layer_results = self.update_values_layer.feed(concat_in) cache.output_gate_results = self.output_gate_layer.feed(concat_in) # calculate state update values update_values = np.multiply( cache.input_gate_results, cache.update_values_layer_results) # apply forget layer and apply state update values cache.state = cache.predecessor.state * cache.forget_gate_results \ + update_values # calculate output from new state and output gate cache.output_values = Layer.activation_tanh(cache.state) * cache.output_gate_results # return calculated output vector return cache.output_values
def learn_rec(self, target, layer_id): Logger.debug("learn recursive: " + str(layer_id)) if layer_id > len(self.training_layers): raise Exception("invalid layer id!") elif layer_id == len(self.training_layers): return [target - self.training_layers[-1].caches[0].output_values] else: last_deltas = self.learn_rec(target, layer_id+1) delta = self.training_layers[layer_id].get_delta( in_data=self.training_layers[layer_id].caches[0].input_values, delta=last_deltas[0], predecessor_activation_deriv=self.training_layers[layer_id-1].activation_deriv) self.training_layers[layer_id].learn( result=self.training_layers[layer_id].caches[0].output_values.T, delta=last_deltas[0].T, learning_rate=self.config['learning_rate'] ) deltas = [delta] deltas.extend(last_deltas) return deltas
def learn(self, target): """ Calculates the weight updates depending on the loss function derived to the individual weights. Requires feedforwarding to be finished for backpropagation through time. @targets expected outputs to be compared to actual outputs """ if not self.populated: raise Exception("MLP Network needs to be populated first! Have a look at MLPNetwork.populate().") Logger.debug("mlpnetwork:learn") Logger.debug("target: " + str(target)) Logger.debug("result: " + str(self.training_layers[-1].caches[0].output_values)) Logger.debug("out_size: " + str(self.training_layers[-1].size)) Logger.debug("out_weights: " + str(self.training_layers[-1].weights)) # calculate output error deltas = [target - self.training_layers[-1].caches[0].output_values] rng = range(len(self.training_layers) - 2, -1, -1) #Logger.log("range: " + str(rng)) #Logger.log("reversed range: " + str(reversed(rng))) #raw_input("press Enter") for layer_id, last_weights in reversed(zip( rng, [l.weights for l in self.training_layers[1:]] )): Logger.debug(str(layer_id) + "/" + str(len(self.training_layers) - 2)) deltas.append( self.training_layers[layer_id].get_delta( self.training_layers[layer_id].caches[0].output_values, deltas[-1], self.training_layers[layer_id + 1].weights ) ) error = deltas[0][0]**2 Logger.debug("deltas: " + str(deltas)) for delta, layer in zip(reversed(deltas), self.training_layers): layer.learn(layer.caches[0].input_values, delta, self.config["learning_rate"]) #self.bias_1 = min(0, -(learning_rate * delta1) + self.bias_1) #self.bias_2 = min(0, -(learning_rate * delta2) + self.bias_2) # returns the absolute error (distance of target output and actual output) return error """
from neural_network.util import Logger from neural_network.NeuralLayer import NeuralLayer as Layer from multi_layer_perceptron.MLPNetwork import MLPNetwork Logger.DEBUG = False data_in = numpy.array([[[0], [0]], [[0], [1]], [[1], [0]], [[1], [1]]]) data_out_xor = numpy.array([[0], [1], [1], [0]]) data_out_and = numpy.array([[0], [0], [0], [1]]) data_out_or = numpy.array([[0], [1], [1], [1]]) data_out = data_out_xor Logger.log("data_shape: " + str(numpy.shape(data_in))) Logger.log("data[0]_shape: " + str(numpy.shape(data_in[0]))) mlp = MLPNetwork() mlp.populate( in_size=2, out_layer= { "size": 1, "fn": None, "fn_deriv": None }, hidden_layers=[{ "size": 3, "fn": Layer.activation_sigmoid, "fn_deriv": Layer.activation_sigmoid_deriv