def train_network(self, epochs): for n in range(epochs): for data_row, true_value in zip(self.training_data, self.training_true): # Calculate the output of the middle layer of the network update = sym_sigmoid( np.matmul(data_row, self.mid_weights) + self.mid_bias) # Calculate the output of the network output = np.dot(update, self.out_weights) # Calculate the error of the network error = true_value - output # Update the final layer weights using gradient descent out_weights_grad = update * -error self.out_weights -= self.alpha * out_weights_grad # Update the final layer bias using gradient descent out_bias_grad = -error self.out_bias -= self.alpha * out_bias_grad # Update the hidden layer weights & bias using gradient descent sig_dev = dev_sym_sigmoid(update) weights_grad = -error * np.outer(data_row, (self.out_weights * sig_dev)) bias_grad = -error * self.out_weights * sig_dev self.mid_weights -= self.alpha * weights_grad self.mid_bias -= self.alpha * bias_grad if n % 50 == 0: # Only calculate error every 50th epoch print(self.calc_error(calc_on='training'))
def forward(self, data_row): # Calculate the vector of outputs from the hidden layer mid_output = sym_sigmoid( np.matmul(data_row, self.mid_weights) + self.mid_bias) mid_output = mid_output.flatten() exponents = np.dot(self.out_weights.T, mid_output) + self.out_bias.flatten() return softmax(exponents)
def train_network(self, epochs): for n in range(epochs): for i in range(0, len(self.training_data), self.batch_size): # Create a random choice of indexes from the training data # batch_idx = np.random.choice(len(self.training_data), self.batch_size, replace=False) # Select random choice from training data and ground truth data_batch = self.training_data[i:i + self.batch_size] true_batch = self.training_true[i:i + self.batch_size] # Calculate the output of the middle layer of the network update = sym_sigmoid( np.matmul(data_batch, self.mid_weights) + self.mid_bias) # Calculate the output of the network output = np.dot(update, self.out_weights) # Calculate the error of the network error = true_batch - output # Update the final layer weights using gradient descent out_weights_grad = np.dot(update.T, (-error).T) self.out_weights = self.out_weights - (self.alpha * out_weights_grad.T) # Update the final layer bias using gradient descent out_bias_grad = -np.sum(error) self.out_bias -= self.alpha * out_bias_grad # Update the hidden layer weights & bias using batch gradient descent sig_dev = dev_sym_sigmoid(update) weights_grad = np.dot( np.multiply( np.multiply(-error, sig_dev.T).T, self.out_weights).T, data_batch) bias_grad = np.multiply(np.dot(-error, sig_dev), self.out_weights) # Update weights by the gradient self.mid_weights -= self.alpha * weights_grad.astype( 'float64').T self.mid_bias -= self.alpha * bias_grad.astype('float64') if n % 50 == 0: # Only calculate error every 50th epoch print(self.calc_error(calc_on='training'))
def train_network(self, epochs): for n in range(epochs): for data_row, true_value in zip(self.training_data, self.training_true): # Create a random choice of indexes from the training data # batch_idx = np.random.choice(len(self.training_data), self.batch_size, replace=False) # Select random choice from training data and ground truth # data_batch = self.training_data[i:i + self.batch_size] # true_batch = self.training_true[i:i + self.batch_size] # Calculate the output of the middle layer of the network update = sym_sigmoid( np.matmul(data_row, self.mid_weights) + self.mid_bias) update = update.flatten() # Calculate the output of the network output = softmax( np.dot(self.out_weights.T, update.T) + self.out_bias) # Calculate the error for use in back propagation error = output - true_value # Update the final layer weights using gradient descent out_weights_grad = np.outer(update, error) self.out_weights -= (self.alpha * out_weights_grad) # Update final layer bias self.out_bias -= error # Update the hidden layer weights & bias using batch gradient descent sig_dev = dev_sym_sigmoid(update) weights_grad = np.outer( data_row.T, sig_dev * np.dot(error, self.out_weights.T)) bias_grad = np.dot(error, self.out_weights.T) * sig_dev # Update weights by the gradient self.mid_weights = self.mid_weights - self.alpha * weights_grad.astype( 'float64') self.mid_bias = self.mid_bias - self.alpha * bias_grad.astype( 'float64') if n % 1 == 0: # Only calculate error every 50th epoch print(self.calc_cross_entropy(calc_on='training'))
def forward(self, data_row): # Calculate the vector of outputs from the hidden layer mid_output = sym_sigmoid( np.matmul(data_row, self.mid_weights) + self.mid_bias) return np.dot(mid_output, self.out_weights)