def __perform_forward_propagation__(self, serialized_example, serialized_label): num_chars = len(serialized_example) # Stores the hidden state for each letter position. letter_pos_to_h0 = np.zeros((num_chars + 1, self.hidden_dimensions)) # Stores the layer 2 values for each letter position letter_pos_to_h1 = np.zeros((num_chars, self.hidden_dimensions)) # Stores the hypothesis for each letter position letter_pos_to_h2 = np.zeros((num_chars, self.output_dimensions)) # The hidden state for the first letter position is all 0s. letter_pos_to_h0[0] = np.zeros(self.hidden_dimensions) # The loss for each letter position letter_pos_to_loss = np.zeros((num_chars, )) for j in range(num_chars): # The inputs X = serialized_example[j] X_with_bias = np.r_[[ self.layer_1_bias ], X] # <- We add a bias to the input. It is now a 28 element array h0 = letter_pos_to_h0[j] y1 = np.dot(self.W1, X_with_bias) + np.dot(self.W0, h0) h1 = ActivationFunctions.tanh(y1) # Adding the bias h1_with_bias = np.r_[[self.layer_2_bias], h1] y2 = np.dot(self.W2, h1_with_bias) h2 = ActivationFunctions.softmax(y2) # Update the dictionaries letter_pos_to_h1[j] = h1 letter_pos_to_h2[j] = h2 letter_pos_to_h0[j + 1] = h1 letter_pos_to_loss[j] = LossFunctions.cross_entropy( h2, serialized_label) return { 'letter_pos_to_loss': letter_pos_to_loss, 'letter_pos_to_hidden_state': letter_pos_to_h0, 'letter_pos_to_layer_2_values': letter_pos_to_h1, 'letter_pos_to_hypothesis': letter_pos_to_h2 }
def test_softmax(self): input_values = [2.0, 1.0, 0.1] expected_values = np.array([0.7, 0.2, 0.1]) actual_values = ActivationFunctions.softmax(input_values) for i in range(3): self.assertTrue(abs(actual_values[i] - expected_values[i]) < 0.1)
def test_tanh_should_return_correct_values_when_given_zeros_in_array(self): expected_values = [math.tanh(0) for i in range(-100, -1)] actual_values = ActivationFunctions.tanh( np.array([0 for i in range(-100, -1)])) self.assertEquals(len(expected_values), len(actual_values)) for i in range(0, len(expected_values)): self.assertTrue( abs(actual_values[i] - expected_values[i]) < 0.0000000001)
def test_tanh_should_return_correct_values_when_given_positive_numbers_in_2D_array( self): expected_values = [[math.tanh(i) for i in range(1, 200)] for j in range(0, 10)] actual_values = ActivationFunctions.tanh( np.array([[i for i in range(1, 200)] for j in range(0, 10)])) self.assertEquals(len(expected_values), len(actual_values)) for i in range(0, 10): self.assertEquals(len(expected_values[i]), len(actual_values[i])) for j in range(0, len(expected_values[i])): self.assertTrue( abs(actual_values[i][j] - expected_values[i][j]) < 0.0000000001)
def predict(self, name): # Serialize the name to a num_char x 27 matrix example = self.serializer.serialize_example(name) # num_chars = len(example) label = np.zeros((self.output_dimensions, )) forward_propagation_results = self.__perform_forward_propagation__( example, label) letter_pos_to_y2 = forward_propagation_results[ 'letter_pos_to_hypothesis'] if len(letter_pos_to_y2) > 0: hypothesis = ActivationFunctions.softmax(letter_pos_to_y2[-1]) formatted_hypothesis = [] for k in range(self.output_dimensions): formatted_hypothesis.append( (hypothesis[k], self.serializer.index_to_label[k])) formatted_hypothesis.sort(reverse=True) return formatted_hypothesis else: raise Exception('Hypothesis cannot be obtained')
def test_tanh_should_return_correct_value_when_given_single_positive_number( self): expected_value = math.tanh(9) actual_value = ActivationFunctions.tanh(9) self.assertTrue(abs(actual_value - expected_value) < 0.0000000001)
def test_tanh_should_return_correct_value_when_given_zero(self): expected_value = math.tanh(0) actual_value = ActivationFunctions.tanh(0) self.assertTrue(abs(actual_value - expected_value) < 0.0000000001)
def __perform_back_propagation__(self, serialized_example, serialized_label, forward_propagation_results): letter_pos_to_h0 = forward_propagation_results[ 'letter_pos_to_hidden_state'] letter_pos_to_h1 = forward_propagation_results[ 'letter_pos_to_layer_2_values'] letter_pos_to_h2 = forward_propagation_results[ 'letter_pos_to_hypothesis'] letter_pos_to_loss = forward_propagation_results['letter_pos_to_loss'] # The loss gradients w.r.t W0, W1, W2 dL_dW0 = np.zeros((self.hidden_dimensions, self.hidden_dimensions)) dL_dW1 = np.zeros((self.hidden_dimensions, self.input_dimensions + 1)) dL_dW2 = np.zeros((self.output_dimensions, self.hidden_dimensions + 1)) num_chars = len(serialized_example) for j in range(num_chars - 1, -1, -1): X = serialized_example[j] X_with_bias = np.r_[[self.layer_1_bias], X] # This is a 1D array with "self.hidden_dimensions" elements h0 = letter_pos_to_h0[j] # This is a 1D array with "self.hidden_dimensions" elements h1 = letter_pos_to_h1[j] # Adding the bias # This is a 1D array with "self.hidden_dimensions + 1" elements h1_with_bias = np.r_[[self.layer_2_bias], h1] # This is a 1D array with "self.output_dimensions" elements h2 = letter_pos_to_h2[j] # This is a 1D array with "self.output_dimentions" elements # This is the derivative of y with respect to the cross entropy score dL_dY2 = h2 - serialized_label # This is a 1D array with "self.hidden_dimensions + 1" elements dL_dH1 = np.dot(dL_dY2.T, self.W2) dL_dY1 = np.multiply( dL_dH1, ActivationFunctions.tanh_derivative_given_tanh_val( h1_with_bias)) # We are removing the bias value # So now it is a "self.hidden_dimensions" elements dL_dY1 = dL_dY1[1:] # We are not updating the weights of the bias value, so we are setting the changes for the bias weights to 0 # We are going to update the weights of the bias value later dL_dW0 += np.dot(np.array([dL_dY1]).T, np.array([h0])) dL_dW1 += np.dot(np.array([dL_dY1]).T, np.array([X_with_bias])) dL_dW2 += np.dot(np.array([dL_dY2]).T, np.array([h1_with_bias])) # Add regularization dL_dW0 += self.l2_lambda * self.W0 dL_dW1 += self.l2_lambda * self.W1 dL_dW2 += self.l2_lambda * self.W2 # Add the velocity self.W0_velocity = (self.momentum * self.W0_velocity) + (self.alpha * dL_dW0) self.W1_velocity = (self.momentum * self.W1_velocity) + (self.alpha * dL_dW1) self.W2_velocity = (self.momentum * self.W2_velocity) + (self.alpha * dL_dW2) # Update weights self.W0 -= self.W0_velocity self.W1 -= self.W1_velocity self.W2 -= self.W2_velocity