def __perform_forward_propagation__(self, serialized_example,
                                        serialized_label):
        num_chars = len(serialized_example)

        # Stores the hidden state for each letter position.
        letter_pos_to_h0 = np.zeros((num_chars + 1, self.hidden_dimensions))

        # Stores the layer 2 values for each letter position
        letter_pos_to_h1 = np.zeros((num_chars, self.hidden_dimensions))

        # Stores the hypothesis for each letter position
        letter_pos_to_h2 = np.zeros((num_chars, self.output_dimensions))

        # The hidden state for the first letter position is all 0s.
        letter_pos_to_h0[0] = np.zeros(self.hidden_dimensions)

        # The loss for each letter position
        letter_pos_to_loss = np.zeros((num_chars, ))

        for j in range(num_chars):
            # The inputs
            X = serialized_example[j]
            X_with_bias = np.r_[[
                self.layer_1_bias
            ], X]  # <- We add a bias to the input. It is now a 28 element array
            h0 = letter_pos_to_h0[j]

            y1 = np.dot(self.W1, X_with_bias) + np.dot(self.W0, h0)
            h1 = ActivationFunctions.tanh(y1)

            # Adding the bias
            h1_with_bias = np.r_[[self.layer_2_bias], h1]

            y2 = np.dot(self.W2, h1_with_bias)
            h2 = ActivationFunctions.softmax(y2)

            # Update the dictionaries
            letter_pos_to_h1[j] = h1
            letter_pos_to_h2[j] = h2
            letter_pos_to_h0[j + 1] = h1

            letter_pos_to_loss[j] = LossFunctions.cross_entropy(
                h2, serialized_label)

        return {
            'letter_pos_to_loss': letter_pos_to_loss,
            'letter_pos_to_hidden_state': letter_pos_to_h0,
            'letter_pos_to_layer_2_values': letter_pos_to_h1,
            'letter_pos_to_hypothesis': letter_pos_to_h2
        }
Exemple #2
0
    def test_softmax(self):
        input_values = [2.0, 1.0, 0.1]
        expected_values = np.array([0.7, 0.2, 0.1])
        actual_values = ActivationFunctions.softmax(input_values)

        for i in range(3):
            self.assertTrue(abs(actual_values[i] - expected_values[i]) < 0.1)
Exemple #3
0
    def test_tanh_should_return_correct_values_when_given_zeros_in_array(self):
        expected_values = [math.tanh(0) for i in range(-100, -1)]
        actual_values = ActivationFunctions.tanh(
            np.array([0 for i in range(-100, -1)]))

        self.assertEquals(len(expected_values), len(actual_values))

        for i in range(0, len(expected_values)):
            self.assertTrue(
                abs(actual_values[i] - expected_values[i]) < 0.0000000001)
Exemple #4
0
    def test_tanh_should_return_correct_values_when_given_positive_numbers_in_2D_array(
            self):
        expected_values = [[math.tanh(i) for i in range(1, 200)]
                           for j in range(0, 10)]
        actual_values = ActivationFunctions.tanh(
            np.array([[i for i in range(1, 200)] for j in range(0, 10)]))

        self.assertEquals(len(expected_values), len(actual_values))

        for i in range(0, 10):
            self.assertEquals(len(expected_values[i]), len(actual_values[i]))

            for j in range(0, len(expected_values[i])):
                self.assertTrue(
                    abs(actual_values[i][j] -
                        expected_values[i][j]) < 0.0000000001)
    def predict(self, name):
        # Serialize the name to a num_char x 27 matrix
        example = self.serializer.serialize_example(name)
        # num_chars = len(example)
        label = np.zeros((self.output_dimensions, ))

        forward_propagation_results = self.__perform_forward_propagation__(
            example, label)
        letter_pos_to_y2 = forward_propagation_results[
            'letter_pos_to_hypothesis']

        if len(letter_pos_to_y2) > 0:
            hypothesis = ActivationFunctions.softmax(letter_pos_to_y2[-1])
            formatted_hypothesis = []
            for k in range(self.output_dimensions):
                formatted_hypothesis.append(
                    (hypothesis[k], self.serializer.index_to_label[k]))

            formatted_hypothesis.sort(reverse=True)

            return formatted_hypothesis
        else:
            raise Exception('Hypothesis cannot be obtained')
Exemple #6
0
    def test_tanh_should_return_correct_value_when_given_single_positive_number(
            self):
        expected_value = math.tanh(9)
        actual_value = ActivationFunctions.tanh(9)

        self.assertTrue(abs(actual_value - expected_value) < 0.0000000001)
Exemple #7
0
    def test_tanh_should_return_correct_value_when_given_zero(self):
        expected_value = math.tanh(0)
        actual_value = ActivationFunctions.tanh(0)

        self.assertTrue(abs(actual_value - expected_value) < 0.0000000001)
    def __perform_back_propagation__(self, serialized_example,
                                     serialized_label,
                                     forward_propagation_results):
        letter_pos_to_h0 = forward_propagation_results[
            'letter_pos_to_hidden_state']
        letter_pos_to_h1 = forward_propagation_results[
            'letter_pos_to_layer_2_values']
        letter_pos_to_h2 = forward_propagation_results[
            'letter_pos_to_hypothesis']
        letter_pos_to_loss = forward_propagation_results['letter_pos_to_loss']

        # The loss gradients w.r.t W0, W1, W2
        dL_dW0 = np.zeros((self.hidden_dimensions, self.hidden_dimensions))
        dL_dW1 = np.zeros((self.hidden_dimensions, self.input_dimensions + 1))
        dL_dW2 = np.zeros((self.output_dimensions, self.hidden_dimensions + 1))

        num_chars = len(serialized_example)

        for j in range(num_chars - 1, -1, -1):
            X = serialized_example[j]
            X_with_bias = np.r_[[self.layer_1_bias], X]

            # This is a 1D array with "self.hidden_dimensions" elements
            h0 = letter_pos_to_h0[j]

            # This is a 1D array with "self.hidden_dimensions" elements
            h1 = letter_pos_to_h1[j]

            # Adding the bias
            # This is a 1D array with "self.hidden_dimensions + 1" elements
            h1_with_bias = np.r_[[self.layer_2_bias], h1]

            # This is a 1D array with "self.output_dimensions" elements
            h2 = letter_pos_to_h2[j]

            # This is a 1D array with "self.output_dimentions" elements
            # This is the derivative of y with respect to the cross entropy score
            dL_dY2 = h2 - serialized_label

            # This is a 1D array with "self.hidden_dimensions + 1" elements
            dL_dH1 = np.dot(dL_dY2.T, self.W2)
            dL_dY1 = np.multiply(
                dL_dH1,
                ActivationFunctions.tanh_derivative_given_tanh_val(
                    h1_with_bias))

            # We are removing the bias value
            # So now it is a "self.hidden_dimensions" elements
            dL_dY1 = dL_dY1[1:]

            # We are not updating the weights of the bias value, so we are setting the changes for the bias weights to 0
            # We are going to update the weights of the bias value later
            dL_dW0 += np.dot(np.array([dL_dY1]).T, np.array([h0]))
            dL_dW1 += np.dot(np.array([dL_dY1]).T, np.array([X_with_bias]))
            dL_dW2 += np.dot(np.array([dL_dY2]).T, np.array([h1_with_bias]))

        # Add regularization
        dL_dW0 += self.l2_lambda * self.W0
        dL_dW1 += self.l2_lambda * self.W1
        dL_dW2 += self.l2_lambda * self.W2

        # Add the velocity
        self.W0_velocity = (self.momentum * self.W0_velocity) + (self.alpha *
                                                                 dL_dW0)
        self.W1_velocity = (self.momentum * self.W1_velocity) + (self.alpha *
                                                                 dL_dW1)
        self.W2_velocity = (self.momentum * self.W2_velocity) + (self.alpha *
                                                                 dL_dW2)

        # Update weights
        self.W0 -= self.W0_velocity
        self.W1 -= self.W1_velocity
        self.W2 -= self.W2_velocity