def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} global_step = 0 for epoch in range(num_epochs): # Task 3a # Shuffling before next epoch shuffle_in_unison(X_train, Y_train) for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) model.ws[0] += -1 * learning_rate * model.grads[0] model.ws[1] += -1 * learning_rate * model.grads[1] # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # TODO: Implement this function (task 3b) # Forward step (retrieving the predictions) outputs = self.model.forward(X_batch) # Backward step self.model.backward(X_batch, outputs, Y_batch) # Updating the weights self.model.w -= self.model.grad * self.learning_rate # Computing the loss loss = cross_entropy_loss(Y_batch, outputs) return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # Perform forward pass to get outputs (predictions) Yhat_batch = self.model.forward(X_batch) # Perform backward pass to get gradiant self.model.backward(X_batch, Yhat_batch, Y_batch) # Update weights in gradiant step self.model.w = self.model.w - self.learning_rate * self.model.grad # Calculate cross entropy loss loss = cross_entropy_loss(Y_batch, Yhat_batch) return loss
def validation_step(self): """ Perform a validation step to evaluate the model at the current step for the validation set. Also calculates the current accuracy of the model on the train set. Returns: loss (float): cross entropy loss over the whole dataset accuracy_ (float): accuracy over the whole dataset Returns: loss value (float) on batch """ # NO NEED TO CHANGE THIS FUNCTION logits = self.model.forward(self.X_val) loss = cross_entropy_loss(Y_val, logits) accuracy_train = calculate_accuracy(X_train, Y_train, self.model) accuracy_val = calculate_accuracy(X_val, Y_val, self.model) return loss, accuracy_train, accuracy_val
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ output = self.model.forward(X_batch) self.model.backward(X_batch, output, Y_batch) self.model.w = self.model.w - self.learning_rate * self.model.grad loss = cross_entropy_loss(Y_batch, output) return loss
model = SoftmaxModel(l2_reg_lambda) # Train model trainer = SoftmaxTrainer( model, learning_rate, batch_size, shuffle_dataset, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Final Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) plt.ylim([0.2, .6]) utils.plot_loss(train_history["loss"], "Training Loss", npoints_to_average=10) utils.plot_loss(val_history["loss"], "Validation Loss") plt.legend() plt.xlabel("Number of Training Steps") plt.ylabel("Cross Entropy Loss - Average") #plt.savefig("task3b_softmax_train_loss.png")