Python LSTMLayerCache Examples

Programming Language: Python

Namespace/Package Name: LSTMLayerCache

Class/Type: LSTMLayerCache

Examples at hotexamples.com: 2

Python LSTMLayerCache - 2 examples found. These are the top rated real world Python examples of LSTMLayerCache.LSTMLayerCache extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

insert_after(1)

insert_before(1)

Example #1

Show file

File: LSTMLayer.py Project: theCalcaholic/prakNN

    def __init__(self, in_size, memory_size):
        """
        initialize lstm layer
        @in_size size of expected input vectors
        @memory_size size of memory vector (state)
        """
        concat_size = in_size + memory_size
        # create forget layer using sigmoid activation function
        self.forget_gate_layer = BiasedLayer(
                in_size=concat_size,  # size of input/last output
                out_size=memory_size,  # size of state
                activation_fn=Layer.activation_sigmoid,
                activation_fn_deriv=Layer.activation_sigmoid_deriv)
        # initialize forget gate layer biases to 1
        self.forget_gate_layer.biases = np.ones(memory_size)
        # create input gate layer using sigmoid activation function
        self.input_gate_layer = BiasedLayer(
                in_size=concat_size,  # size of input/last output
                out_size=memory_size,  # size of update_values_layer (transposed state)
                activation_fn=Layer.activation_sigmoid,
                activation_fn_deriv=Layer.activation_sigmoid_deriv)
        # create update values layer using tanh activation function
        self.update_values_layer = BiasedLayer(
                in_size=concat_size,  # size of input/last output
                out_size=memory_size,  # size of state
                activation_fn=Layer.activation_tanh,
                activation_fn_deriv=Layer.activation_tanh_deriv)
        # create output gate layer using sigmoid activation function
        self.output_gate_layer = BiasedLayer(
                in_size=concat_size,  # size of input/last output
                out_size=memory_size,  # size of state
                activation_fn=Layer.activation_sigmoid,
                activation_fn_deriv=Layer.activation_sigmoid_deriv)
        # create container object for pending updates
        self.pending_updates = LSTMLayerPendingUpdates(in_size, memory_size)
        self.size = memory_size
        self.in_size = in_size

        #initialize cache chain (self.first_cache and self.last_cache are dummy caches)
        self.first_cache = LSTMLayerCache()
        self.first_cache.is_first_cache = True
        self.first_cache.state = np.zeros((self.size, 1))
        self.first_cache.output_values = np.zeros((self.size, 1))

        self.last_cache = LSTMLayerCache()
        self.first_cache.insert_after(self.last_cache)
        self.last_cache.is_last_cache = True

        self.caches = []

Example #2

Show file

File: LSTMLayer.py Project: theCalcaholic/prakNN

class LSTMLayer(object):
    """
    represents a complete lstm layer (resp. lstm block) while offering a similar interface as the NeuralLayer class
    """
    def __init__(self, in_size, memory_size):
        """
        initialize lstm layer
        @in_size size of expected input vectors
        @memory_size size of memory vector (state)
        """
        concat_size = in_size + memory_size
        # create forget layer using sigmoid activation function
        self.forget_gate_layer = BiasedLayer(
                in_size=concat_size,  # size of input/last output
                out_size=memory_size,  # size of state
                activation_fn=Layer.activation_sigmoid,
                activation_fn_deriv=Layer.activation_sigmoid_deriv)
        # initialize forget gate layer biases to 1
        self.forget_gate_layer.biases = np.ones(memory_size)
        # create input gate layer using sigmoid activation function
        self.input_gate_layer = BiasedLayer(
                in_size=concat_size,  # size of input/last output
                out_size=memory_size,  # size of update_values_layer (transposed state)
                activation_fn=Layer.activation_sigmoid,
                activation_fn_deriv=Layer.activation_sigmoid_deriv)
        # create update values layer using tanh activation function
        self.update_values_layer = BiasedLayer(
                in_size=concat_size,  # size of input/last output
                out_size=memory_size,  # size of state
                activation_fn=Layer.activation_tanh,
                activation_fn_deriv=Layer.activation_tanh_deriv)
        # create output gate layer using sigmoid activation function
        self.output_gate_layer = BiasedLayer(
                in_size=concat_size,  # size of input/last output
                out_size=memory_size,  # size of state
                activation_fn=Layer.activation_sigmoid,
                activation_fn_deriv=Layer.activation_sigmoid_deriv)
        # create container object for pending updates
        self.pending_updates = LSTMLayerPendingUpdates(in_size, memory_size)
        self.size = memory_size
        self.in_size = in_size

        #initialize cache chain (self.first_cache and self.last_cache are dummy caches)
        self.first_cache = LSTMLayerCache()
        self.first_cache.is_first_cache = True
        self.first_cache.state = np.zeros((self.size, 1))
        self.first_cache.output_values = np.zeros((self.size, 1))

        self.last_cache = LSTMLayerCache()
        self.first_cache.insert_after(self.last_cache)
        self.last_cache.is_last_cache = True

        self.caches = []

    @classmethod
    def get_convolutional_layer(cls, reference_layer):
        """returns a copy of the lstm layer which shares the same weights and biases"""
        conv_layer = LSTMLayer(
            in_size=reference_layer.in_size,
            memory_size=reference_layer.size
        )
        # set the second layer's gates and update values layer to this one's
        conv_layer.input_gate_layer = reference_layer.input_gate_layer
        conv_layer.forget_gate_layer = reference_layer.forget_gate_layer
        conv_layer.output_gate_layer = reference_layer.output_gate_layer
        conv_layer.update_values_layer = reference_layer.update_values_layer

        return conv_layer

    def feed(self, input_data, time_steps=1):
        """
        calculate output vector for given input vector
        @time_steps number of feedforward results to be cached for use in backpropagation through time
        """
        assert time_steps > 0, "time_steps must be at least 1 (for recursive input)!"
        Logger.debug("feed(" + str(input_data) + ")")

        # concatenate input_vector with recurrent input (last output) vector
        concat_in = np.concatenate([input_data, self.last_cache.predecessor.output_values])
        # delete first cache if maximum number of time_steps are already cached
        if time_steps <= len(self.caches):
            self.first_cache.successor.remove()
            self.caches.pop(0)

        # create new cache at end of cache list
        self.last_cache.insert_before(LSTMLayerCache())
        cache = self.last_cache.predecessor
        self.caches.append(cache)

        # cache input and concatenated input values
        cache.input_values = input_data
        cache.concatenated_input = concat_in

        # calculate and cache gate/update_values results
        cache.forget_gate_results = self.forget_gate_layer.feed(concat_in)
        cache.input_gate_results = self.input_gate_layer.feed(concat_in)
        cache.update_values_layer_results = self.update_values_layer.feed(concat_in)
        cache.output_gate_results = self.output_gate_layer.feed(concat_in)

        # calculate state update values
        update_values = np.multiply(
                cache.input_gate_results,
                cache.update_values_layer_results)

        # apply forget layer and apply state update values
        cache.state = cache.predecessor.state * cache.forget_gate_results \
                      + update_values
        # calculate output from new state and output gate
        cache.output_values = Layer.activation_tanh(cache.state) * cache.output_gate_results

        # return calculated output vector
        return cache.output_values

    def learn_recursive(self, cache, deltas):
        """
        learn timesteps recursively
        @cache cache corresponding to current time step
        @deltas deltas from last (actually next) layer
        """
        # terminate if there are no target values or caches left
        if len(deltas) == 0 or cache.is_first_cache:
            return
        # get delta for current time step
        delta = deltas[-1]

        # calculate cumulative loss derived with respect to output (CEC - Constant Error Carousel)
        loss_output = delta + cache.successor.loss_output

        # retrieve loss from last time step (t+1) derived wrt state
        last_loss_state = cache.successor.loss_state

        ### calculate deltas
        delta_state = cache.output_gate_results * loss_output + last_loss_state

        delta_output_gate = self.output_gate_layer.activation_deriv(
                cache.output_gate_results) * cache.state * loss_output

        delta_input_gate = self.input_gate_layer.activation_deriv(
                cache.input_gate_results) * cache.update_values_layer_results * delta_state

        delta_update_values_layer = self.update_values_layer.activation_deriv(
                cache.update_values_layer_results) * cache.input_gate_results * delta_state

        delta_forget_gate = self.forget_gate_layer.activation_deriv(
                cache.forget_gate_results) * cache.predecessor.state * delta_state
        ###

        # retrieve concatenated input from cache
        concat_in = cache.concatenated_input

        # add weight adjustments to pending updates object
        self.pending_updates.input_gate_weights += \
            np.outer(delta_input_gate, concat_in)
        self.pending_updates.forget_gate_weights += \
            np.outer(delta_forget_gate, concat_in)
        self.pending_updates.output_gate_weights += \
            np.outer(delta_output_gate, concat_in)
        self.pending_updates.update_values_layer_weights += \
            np.outer(delta_update_values_layer, concat_in)

        # add bias adjustments to pending updates object
        self.pending_updates.input_gate_biases += np.ravel(delta_input_gate)
        self.pending_updates.forget_gate_biases += np.ravel(delta_forget_gate)
        self.pending_updates.output_gate_biases += np.ravel(delta_output_gate)
        self.pending_updates.update_values_layer_biases += np.ravel(delta_update_values_layer)

        # calculate loss with respect to concatenated input
        delta_concatinated_input = np.zeros_like(concat_in) + \
                                   np.dot(self.input_gate_layer.weights.T, delta_input_gate) + \
                                   np.dot(self.forget_gate_layer.weights.T, delta_forget_gate) + \
                                   np.dot(self.output_gate_layer.weights.T, delta_output_gate) + \
                                   np.dot(self.update_values_layer.weights.T, delta_update_values_layer)

        # save loss for Constant Error Carousel
        cache.loss_state = delta_state * cache.forget_gate_results
        cache.loss_input = delta_concatinated_input[:self.in_size]
        cache.loss_output = delta_concatinated_input[self.in_size:]

        # call itself recursively for next time step (t-1)
        return self.learn_recursive(cache.predecessor, deltas[:-1])

    def learn(self, deltas, learning_rate=0.001):
        """
        apply learning algorithm by using deltas from next layer
        @deltas deltas from last (actually next) layer
        """
        Logger.debug("learn(" + str(deltas) + ")")
        # learn recursively over all caches (corresponds to time steps), starting with last cache
        self.learn_recursive(self.last_cache.predecessor, deltas)
        # apply pending weight and bias updates
        self.apply_training(learning_rate)
        # calculate and return deltas for this layer from losses
        deltas = [cache.loss_input for cache in self.caches]
        return deltas

    def apply_training(self, learning_rate):
        """applies the calculated weight and bias updates and resets pending_updates object"""
        p_updates = self.pending_updates
        lr = learning_rate
        # subtract updates multiplied with learning rate from weight matrices/bias vectors
        self.forget_gate_layer.weights -= lr * p_updates.forget_gate_weights
        self.input_gate_layer.weights -= lr * p_updates.input_gate_weights
        self.update_values_layer.weights -= lr * p_updates.update_values_layer_weights
        self.output_gate_layer.weights -= lr * p_updates.output_gate_weights
        self.forget_gate_layer.biases -= lr * p_updates.forget_gate_biases
        self.input_gate_layer.biases -= lr * p_updates.input_gate_biases
        self.update_values_layer.biases -= lr * p_updates.update_values_layer_biases
        self.output_gate_layer.biases -= lr * p_updates.output_gate_biases
        # reset pending updates
        p_updates.reset()

        # clip matrices to prevent exploding gradient
        for matrix in [
                self.forget_gate_layer.weights,
                self.input_gate_layer.weights,
                self.update_values_layer.weights,
                self.output_gate_layer.weights,
                self.forget_gate_layer.biases,
                self.input_gate_layer.biases,
                self.update_values_layer.biases,
                self.output_gate_layer.biases]:
            np.clip(matrix, -5, 5, out=matrix)

    def save(self, directory):
        """save weights and biases to directory"""
        self.forget_gate_layer.save(os.path.join(directory, "forget_gate.npz"))
        self.input_gate_layer.save(os.path.join(directory, "input_gate.npz"))
        self.output_gate_layer.save(os.path.join(directory, "output_gate.npz"))
        self.update_values_layer.save(os.path.join(directory, "update_values_layer.npz"))

    def load(self, directory):
        """load weights and biases from directory"""
        self.forget_gate_layer.load(os.path.join(directory, "forget_gate.npz"))
        self.input_gate_layer.load(os.path.join(directory, "input_gate.npz"))
        self.output_gate_layer.load(os.path.join(directory, "output_gate.npz"))
        self.update_values_layer.load(os.path.join(directory, "update_values_layer.npz"))

    def visualize(self, path, layer_id):
        """generate visualization of weights and biases"""
        self.input_gate_layer.visualize(os.path.join(path, "LSTM" + str(layer_id), "obs_" + "InputG_1_0.pgm"))
        self.forget_gate_layer.visualize(os.path.join(path, "LSTM" + str(layer_id), "obs_" + "ForgetG_2_0.pgm"))
        self.output_gate_layer.visualize(os.path.join(path, "LSTM" + str(layer_id), "obs_" + "OutputG_3_0.pgm"))
        self.update_values_layer.visualize(os.path.join(path, "LSTM" + str(layer_id), "obs_" + "UpdateL_4_0.pgm"))

    def clear_cache(self):
        """clear all caches (i.e. state , error carousel, layer results)"""
        self.caches = []
        self.first_cache.successor = self.last_cache
        self.last_cache.predecessor = self.first_cache