def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Track train / validation loss / accuracy # every time we progress 20% through the dataset outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) for i in range(len(model.ws)): model.ws[i] = model.ws[i] - learning_rate * model.grads[i] if (global_step % num_steps_per_val) == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss _train_loss = cross_entropy_loss(Y_batch, outputs) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train_and_evaluate( neurons_per_layer: int, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_improved_sigmoid: bool, use_improved_weight_init: bool, use_momentum: bool, momentum_gamma: float, use_shift=False): model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) model, train_loss, val_loss, train_accuracy, val_accuracy = train( model, datasets, num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, use_shuffle=use_shuffle, use_momentum=use_momentum, momentum_gamma=momentum_gamma, use_shift=use_shift) print("----------", use_shuffle, use_improved_sigmoid, use_improved_weight_init, use_momentum, momentum_gamma, "----------") print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Test Cross Entropy Loss:", cross_entropy_loss(Y_test, model.forward(X_test))) print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Final Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("Final Test accuracy:", calculate_accuracy(X_test, Y_test, model)) return train_loss, val_loss, train_accuracy, val_accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ outputs = model.forward(X) return np.mean(np.argmax(targets, axis=1) == np.argmax(outputs, axis=1))
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ num_classes = targets.shape[1] predictions = np.argmax(model.forward(X), axis=1) accuracy = np.count_nonzero( predictions == np.argmax(targets, axis=1)) / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # perform predictions Yhat = model.forward(X) # calculate accuracy by dividing the correct predictions with the total number of predictions accuracy = (Yhat.argmax(axis=1) == targets.argmax(axis=1)).mean() return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) logits = model.forward(X) outputs = np.zeros_like(logits) outputs[np.arange(len(logits)), logits.argmax(1)] = 1 accuracy = np.mean((outputs == targets).all(1)) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ targets_indices = np.argmax(targets, axis=1) outputs_indices = np.argmax(model.forward(X), axis=1) result = np.equal(targets_indices, outputs_indices) result.size accuracy = (result.sum()) / result.size return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) y_hat = np.array(model.forward(X)) y_predicted_position = np.argmax(y_hat, axis=1) y_position = np.argmax(targets, axis=1) accuracy = np.count_nonzero( y_position == y_predicted_position) / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) outputs = model.forward(X) max_outputs = np.argmax(outputs, axis=1) max_targets = np.argmax(targets, axis=1) sum = outputs.shape[0] - np.count_nonzero(max_outputs - max_targets) accuracy = sum / outputs.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) accuracy = 0.0 logits = model.forward(X) logits_max, targets_max = np.argmax(logits, axis=1), np.argmax(targets, axis=1) accuracy = (1 / targets.shape[0]) * np.sum( [(1 if l == t else 0) for (l, t) in zip(logits_max, targets_max)]) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) accuracy = 0 output = model.forward(X) predictions = one_hot_encode(np.array([np.argmax(output, axis=1)]).T, 10) correct_pred = np.count_nonzero(targets * predictions) total_pred = output.shape[0] accuracy = correct_pred / total_pred return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) predictions = model.forward(X) num_predictions = predictions.shape[0] correct_predictions = np.sum( np.argmax(predictions, axis=1) == np.argmax(targets, axis=1)) return correct_predictions / num_predictions
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) # First computation of the prediction outputs = model.forward(X) # Convert the prediction into 0 and 1 by setting as 1 the highest value in the 10 outputs, the rest will be 0. accuracy = np.sum( outputs.argmax(1) == targets.argmax(1)) / targets.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ number_of_predictions = X.shape[0] number_of_rights = 0 y_hat = model.forward(X) for i in range(0, number_of_predictions): y_hat[i] = np.around(y_hat[i]) if np.array_equal(y_hat[i], targets[i]): number_of_rights += 1 accuracy = number_of_rights / number_of_predictions return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ outputs = model.forward(X) N = targets.shape[0] correctOutputs = 0 for i in range(N): target = np.where(targets[i] == 1)[0][0] output = np.argmax(outputs[i]) if target == output: correctOutputs += 1 return correctOutputs / N
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) predictions = model.forward(X) accuracy = 0 for n in range(X.shape[0]): prediction = np.argmax(predictions[n, :]) target = np.argmax(targets[n, :]) if prediction == target: accuracy += 1 return accuracy / X.shape[0]
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # forward pass logits = model.forward(X) # finding the index of the max values for both arrays logits = logits.argmax(axis=1) targets = targets.argmax(axis=1) # counting the equal entries and averaging accuracy = np.count_nonzero(np.equal(targets, logits)) / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # Compute the outputs outputs = model.forward(X) # Counting the correct predictions nb_predictions = outputs.shape[0] nb_correct_predictions = 0 for row, output in enumerate(outputs): index = np.argmax(output) if targets[row][index] == 1: nb_correct_predictions += 1 accuracy = nb_correct_predictions / nb_predictions return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # Copied from last assignment. accuracy = 0 lgts = model.forward(X) lgts_max = np.argmax(lgts, axis=1) targets_max = np.argmax(targets, axis=1) accuracy = ((1 / targets.shape[0]) * np.sum([(1 if l == t else 0) for (l, t) in zip(lgts_max, targets_max)])) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ logits = model.forward(X) correctPredictions = 0.0 numberOfPredictions = logits.shape[0] for i in range(logits.shape[0]): prediction = np.argmax(logits[i]) target = np.argmax(targets[i]) if(target == prediction): correctPredictions += 1 accuracy = correctPredictions / numberOfPredictions return accuracy
Y_test = one_hot_encode(Y_test, 10) # Hyperparameters num_epochs = 20 learning_rate = .1 batch_size = 32 neurons_per_layer = [64, 10] momentum_gamma = .9 # Task 3 hyperparameter # Settings for task 3. Keep all to false for task 2. use_shuffle = False use_improved_sigmoid = False use_improved_weight_init = False use_momentum = False model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) model, train_loss, val_loss, train_accuracy, val_accuracy = train( model, [X_train, Y_train, X_val, Y_val, X_test, Y_test], num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, use_shuffle=use_shuffle, use_momentum=use_momentum, momentum_gamma=momentum_gamma) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Test Cross Entropy Loss:",
X_test = pre_process_images(X_test, mean=mu, std=sigma) # Hyperparameters num_epochs = 20 learning_rate = .1 batch_size = 32 neurons_per_layer = [64, 10] momentum_gamma = .9 # Task 3 hyperparameter # Settings for task 3. Keep all to false for task 2. use_shuffle = False use_improved_sigmoid = False use_improved_weight_init = False use_momentum = False model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) # Initial the weight to randomly sampled weights between [-1, 1] for layer_idx, w in enumerate(model.ws): model.ws[layer_idx] = np.random.uniform(-1, 1, size=w.shape) model, train_loss, val_loss, train_accuracy, val_accuracy = train( model, [X_train, Y_train, X_val, Y_val, X_test, Y_test], num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, use_shuffle=use_shuffle, use_momentum=use_momentum, momentum_gamma=momentum_gamma) print("Final Train Cross Entropy Loss:",
momentum_gamma = .9 shuffle_data = True # TODO: Change if combination of all improvements is working use_improved_sigmoid = True use_improved_weight_init = True use_momentum = False # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) current_train_history, current_val_history = trainer.train(num_epochs) train_history[model_name] = current_train_history val_history[model_name] = current_val_history
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: outputs = model.forward(X) return np.mean(np.argmax(targets, axis=1) == np.argmax(outputs, axis=1))
neurons_per_layer = [64, 10] momentum_gamma = .9 # Task 3 hyperparameter shuffle_data = True use_improved_sigmoid = False use_improved_weight_init = False use_momentum = False # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) # Example created in assignment text - Comparing with and without shuffling.
use_improved_sigmoid = False use_improved_weight_init = False use_momentum = False # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) ######1nd model - network from task 3###### use_improved_sigmoid = True use_improved_weight_init = True use_momentum = True model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) ######2nd model - network from task 4d ######
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Early stop variables early_stopped_weight_j = np.zeros( (model.ws[0].shape[0], model.ws[0].shape[1])) early_stopped_weight_k = np.zeros( (model.ws[1].shape[0], model.ws[1].shape[1])) early_stop_counter = 0 best_loss = float("inf") global_step = 0 for epoch in tqdm(range(num_epochs)): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) # Update the weights model.ws[0] = model.ws[0] - learning_rate * model.grads[0] model.ws[1] = model.ws[1] - learning_rate * model.grads[1] # Track training loss continuously over the entire X_Train and not only the current batch #outputs_training = model.forward(X_train) #_train_loss = cross_entropy_loss(Y_batch, outputs) #train_loss[global_step] = _train_loss # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: # Test the validation data on the network outputs_validation = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, outputs_validation) val_loss[global_step] = _val_loss # Track training loss over the entire X_Train and not only the current batch # once every validation epoch outputs_training = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, outputs_training) train_loss[global_step] = _train_loss # Early stop implementation # If the loss does not reduce compared to best loss, increment counter # Otherwise, set the counter to 0 and update best loss if _val_loss >= best_loss: early_stop_counter += 1 else: early_stop_counter = 0 best_loss = _val_loss early_stopped_weight_j = model.ws[0] early_stopped_weight_k = model.ws[1] # If 30 times in a row a new best loss was not achieved, stop the program if early_stop_counter == 30: print( "The cross entropy loss for validation data increased too much, thus triggering " "the early stop at step : " + str(global_step) + " and epoch : " + str(epoch)) model.ws[0] = early_stopped_weight_j model.ws[1] = early_stopped_weight_k return model, train_loss, val_loss, train_accuracy, val_accuracy train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
shuffle_data = True use_improved_sigmoid = True use_improved_weight_init = True use_momentum = True # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() mean = X_train.mean() sd = X_train.std() X_train = pre_process_images(X_train, mean, sd) X_val = pre_process_images(X_val, mean, sd) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) #two hidden layers
if __name__ == "__main__": # Simple test on one-hot encoding Y = np.zeros((1, 1), dtype=int) Y[0, 0] = 3 Y = one_hot_encode(Y, 10) assert Y[0, 3] == 1 and Y.sum() == 1, \ f"Expected the vector to be [0,0,0,1,0,0,0,0,0,0], but got {Y}" X_train, Y_train, *_ = utils.load_full_mnist(0.1) mean = np.mean(X_train) std = np.std(X_train) X_train = pre_process_images(X_train, mean, std) Y_train = one_hot_encode(Y_train, 10) assert X_train.shape[1] == 785,\ f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}" # Modify your network here neurons_per_layer = [64, 64, 10] use_improved_sigmoid = True use_improved_weight_init = True model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) # Gradient approximation check for 100 images X_train = X_train[:100] Y_train = Y_train[:100] for layer_idx, w in enumerate(model.ws): model.ws[layer_idx] = np.random.uniform(-1, 1, size=w.shape) gradient_approximation_test(model, X_train, Y_train)
early_stopping = True use_improved_sigmoid = True use_improved_weight_init = True use_momentum = True # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() mean = np.mean(X_train) std = np.std(X_train) X_train = pre_process_images(X_train, mean, std) X_val = pre_process_images(X_val, mean, std) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer_small, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, early_stopping, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs)