def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] model.backward(X_batch, model.forward(pre_process_images(X_batch)), Y_batch) print(model.grad.shape) model.w = model.w - learning_rate * (1 / batch_size) * np.sum( model.grad) # Track training loss continuously _train_loss = cross_entropy_loss( Y_batch, model.forward(pre_process_images(X_batch))) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = 0 val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) predictions = model.forward(X) num_predictions = predictions.shape[0] correct_predictions = np.sum(predictions.round() == targets) accuracy = correct_predictions / num_predictions return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # perform predictions Yhat = model.forward(X) # calculate accurancy by dividing the correct predictions with the total number of predictions accuracy = (Yhat.round() == targets).mean() return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) y_pred = model.forward(X) # print(f'Shape of the prediction: {y_pred.shape}') y_pred_bin = [1 if pred >= 0.5 else 0 for pred in y_pred] accuracy = np.sum(y_pred_bin == targets.flatten()) / targets.shape[0] # print(accuracy) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # Task 2c tot_preds = X.shape[0] # total number of predictions num_errors = np.sum(abs( targets - model.forward(X).round())) # abs error between target and prediction accuracy = (tot_preds - num_errors) / tot_preds return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Computes the accuracy of the number detection of the neural network Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # Task 2c outputs = model.forward(X) false_classifications_count = (np.absolute(targets - outputs) < 0.5).sum() accuracy = false_classifications_count / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) lgts = model.forward(X) lgts = [(1 if l >= 0.5 else 0) for l in lgts] accuracy = (1 / targets.shape[0]) * np.sum( [(1 if t == 1 else 0) for (t, l) in zip(targets, lgts)]) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # Task 2c output = model.forward(X) rounded_output = np.around(output) difference = targets - rounded_output num_correct = len(np.where(difference == 0)[0]) accuracy = num_correct / len(output) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) # First computation of the prediction outputs = model.forward(X) # Convert the prediction into 0 and 1, using 0.5 as threshold. Then "Accuracy" is computed accuracy = np.sum( np.where(outputs > 0.5, 1, 0) == targets) / targets.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ #Task 2c output = model.forward(X) predictions = np.where(output >= 0.5, 1, np.zeros(output.shape)) correct_pred = np.count_nonzero(predictions == targets) total_pred = output.shape[0] accuracy = correct_pred / total_pred return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] outputs: outputs of model of shape: [batch size, 1] targets: labels/targets of each image of shape: [batch size, 1] Returns: Accuracy (float) """ # Task 2c outputs = model.forward(X) N = targets.shape[0] binary_threshold = 0.5 correctOutputs = 0 for i in range(N): if (targets[i] == 1 and outputs[i] >= binary_threshold) or ( targets[i] == 0 and outputs[i] < binary_threshold): correctOutputs += 1 return correctOutputs / N
batch_size = 128 shuffle_dataset = False early_stopping = True # Load dataset category1, category2 = 2, 3 X_train, Y_train, X_val, Y_val = utils.load_binary_dataset( category1, category2) X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) # ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED. # Intialize model model = BinaryModel() # Train model trainer = LogisticTrainer( model, learning_rate, batch_size, shuffle_dataset, early_stopping, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) # Plot and print everything you want of information print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model))
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test, early_stopping_step # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) # Early stopping var init last_loss = INT_MAX already_failed = 0 global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # The mini-batch gradient descent algorithm for m batches and a single epoch. model.backward(X_batch, model.forward(X_batch), Y_batch) model.w = model.w - learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, model.forward(X_batch)) train_loss[global_step] = _train_loss[0, 0] # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss[0, 0] train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) # Early stopping criteria if (_val_loss[0, 0] > last_loss and already_failed > 20): # Stop early #print("Early stopping kicked in at epoch nr.:",epoch+1) #return model, train_loss, val_loss, train_accuracy, val_accuracy if early_stopping_step == 0: early_stopping_step = global_step # Means failed this round elif (_val_loss[0, 0] > last_loss): already_failed += 1 # The loss improved this round, reset counter else: last_loss = _val_loss[0, 0] already_failed = 0 global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda, X_train.shape[0]) # initialize weights and outputs model.w = np.zeros((785, 1)) # for early stopping is_val_loss_increasing = [False] * num_increases global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # forward and backward pass output = model.forward(X_batch) model.backward(X_batch, output, Y_batch) # update weights model.w = model.w - learning_rate * model.grad # Track training loss continuously output_train = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, output_train) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: output_val = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, output_val) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) # early stopping stopping = False if with_stopping == True and global_step > 0: stopping = early_stopping(num_increases, is_val_loss_increasing, val_loss, global_step, num_steps_per_val) if with_stopping == True and stopping is True: break global_step += 1 if with_stopping == True and stopping is True: print('Epoch =', epoch) break return model, train_loss, val_loss, train_accuracy, val_accuracy
def train(num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) global_step = 0 last_val_loss = 1 best_val_loss = 1 best_weights = None for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Forward pass train_outputs = model.forward(X_batch) # Backward propagation model.backward(X_batch, train_outputs, Y_batch) model.w -= learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, train_outputs) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # Compute validation loss for early stopping val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) if _val_loss <= best_val_loss: best_weights = model.w best_val_loss = _val_loss if _val_loss > last_val_loss: model.w = best_weights break last_val_loss = _val_loss return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) model.w = model.w - learning_rate * model.grad # Track training loss continuously _train_loss = 0 _train_loss = cross_entropy_loss(Y_batch, outputs) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = 0 _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) #early stopping counter = 0 for i in range(10): current = (global_step - i * num_steps_per_val) previous = (global_step - (i + 1) * num_steps_per_val) #when counter is 9 or greater early stopping kicks in if (len(val_accuracy) > 8 * num_steps_per_val and val_accuracy[current] <= val_accuracy[previous]): counter += 1 if counter > 8: print("Stopping early at step: " + str(global_step) + " in epoch " + str(epoch)) return model, train_loss, val_loss, train_accuracy, val_accuracy global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy