def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter ): global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Intialize our model model = SoftmaxModel(l2_reg_lambda, X_train.shape[0], 10) # initialize weights and outputs model.w = np.zeros((785, 10)) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # forward and backward pass output = model.forward(X_batch) model.backward(X_batch, output, Y_batch) # update weights model.w = model.w - learning_rate * model.grad _train_loss = 0 train_loss[global_step] = _train_loss # Track training loss continuously output_train = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, output_train) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: output_val = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, output_val) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter ): global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} if X_train.shape[1] == 784: X_train = pre_process_images(X_train) if X_test.shape[1] == 784: X_test = pre_process_images(X_test) if X_val.shape[1] == 784: X_val = pre_process_images(X_val) # Intialize our model model = SoftmaxModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) model.w += -1 * learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter ): global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Intialize our model model = SoftmaxModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # The mini-batch gradient descent algorithm for m batches and a single epoch. model.backward(X_batch, model.forward(X_batch), Y_batch) model.w = model.w - learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, model.forward(X_batch)) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train(num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float): global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Intialize our model model = SoftmaxModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Forward pass train_outputs = model.forward(X_batch) # Backward propagation model.backward(X_batch, train_outputs, Y_batch) model.w -= learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, train_outputs) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ output = self.model.forward(X_batch) self.model.backward(X_batch,output,Y_batch) self.model.w = self.model.w-self.learning_rate*self.model.grad loss = cross_entropy_loss(Y_batch, output) return loss
def validation_step(self): """ Perform a validation step to evaluate the model at the current step for the validation set. Also calculates the current accuracy of the model on the train set. Returns: loss (float): cross entropy loss over the whole dataset accuracy_ (float): accuracy over the whole dataset Returns: loss value (float) on batch """ # NO NEED TO CHANGE THIS FUNCTION logits = self.model.forward(self.X_val) loss = cross_entropy_loss(Y_val, logits) accuracy_train = calculate_accuracy( X_train, Y_train, self.model) accuracy_val = calculate_accuracy( X_val, Y_val, self.model) return loss, accuracy_train, accuracy_val
# Hyperparameters num_epochs = 50 learning_rate = .3 batch_size = 128 l2_reg_lambda = 0.001 model, train_loss, val_loss, train_accuracy, val_accuracy = train( num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, l2_reg_lambda=l2_reg_lambda) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Test Entropy Loss:", cross_entropy_loss(Y_test, model.forward(X_test))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Final Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("Final Test accuracy:", calculate_accuracy(X_test, Y_test, model)) # Plot loss #plt.ylim([0.01, .2]) utils.plot_loss(train_loss, "Training Loss") utils.plot_loss(val_loss, "Validation Loss")