def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} if use_momentum: learning_rate = 0.02 global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Track train / validation loss / accuracy # every time we progress 20% through the dataset prev_grads = model.grads outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) for i in range(len(model.ws)): if use_momentum: model.ws[i] = model.ws[i] - learning_rate * (model.grads[i] + momentum_gamma * prev_grads[i]) else: model.ws[i] = model.ws[i] - learning_rate * model.grads[i] if (global_step % num_steps_per_val) == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss _train_loss = cross_entropy_loss(Y_train, model.forward(X_train)) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # shuffle training examples after each epoch if use_shuffle: X_train, Y_train = unison_shuffled_copies(X_train, Y_train) return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Track train / validation loss / accuracy # every time we progress 20% through the dataset outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) # update weigths model.ws[-1] = model.ws[-1] - learning_rate * model.grads[-1] model.ws[-2] = model.ws[-2] - learning_rate * model.grads[-2] if (global_step % num_steps_per_val) == 0: _outputs_train = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, _outputs_train) train_loss[global_step] = _train_loss _outputs_val = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, _outputs_val) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train_and_evaluate( neurons_per_layer: int, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_improved_sigmoid: bool, use_improved_weight_init: bool, use_momentum: bool, momentum_gamma: float, use_shift=False): model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) model, train_loss, val_loss, train_accuracy, val_accuracy = train( model, datasets, num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, use_shuffle=use_shuffle, use_momentum=use_momentum, momentum_gamma=momentum_gamma, use_shift=use_shift) print("----------", use_shuffle, use_improved_sigmoid, use_improved_weight_init, use_momentum, momentum_gamma, "----------") print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Test Cross Entropy Loss:", cross_entropy_loss(Y_test, model.forward(X_test))) print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Final Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("Final Test accuracy:", calculate_accuracy(X_test, Y_test, model)) return train_loss, val_loss, train_accuracy, val_accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ outputs = model.forward(X) return np.mean(np.argmax(targets, axis=1) == np.argmax(outputs, axis=1))
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ num_classes = targets.shape[1] predictions = np.argmax(model.forward(X), axis=1) accuracy = np.count_nonzero( predictions == np.argmax(targets, axis=1)) / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # perform predictions Yhat = model.forward(X) # calculate accuracy by dividing the correct predictions with the total number of predictions accuracy = (Yhat.argmax(axis=1) == targets.argmax(axis=1)).mean() return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) logits = model.forward(X) outputs = np.zeros_like(logits) outputs[np.arange(len(logits)), logits.argmax(1)] = 1 accuracy = np.mean((outputs == targets).all(1)) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ targets_indices = np.argmax(targets, axis=1) outputs_indices = np.argmax(model.forward(X), axis=1) result = np.equal(targets_indices, outputs_indices) result.size accuracy = (result.sum()) / result.size return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) y_hat = np.array(model.forward(X)) y_predicted_position = np.argmax(y_hat, axis=1) y_position = np.argmax(targets, axis=1) accuracy = np.count_nonzero( y_position == y_predicted_position) / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) accuracy = 0.0 logits = model.forward(X) logits_max, targets_max = np.argmax(logits, axis=1), np.argmax(targets, axis=1) accuracy = (1 / targets.shape[0]) * np.sum( [(1 if l == t else 0) for (l, t) in zip(logits_max, targets_max)]) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) predictions = model.forward(X) num_predictions = predictions.shape[0] correct_predictions = np.sum( np.argmax(predictions, axis=1) == np.argmax(targets, axis=1)) return correct_predictions / num_predictions
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) # First computation of the prediction outputs = model.forward(X) # Convert the prediction into 0 and 1 by setting as 1 the highest value in the 10 outputs, the rest will be 0. accuracy = np.sum( outputs.argmax(1) == targets.argmax(1)) / targets.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) outputs = model.forward(X) max_outputs = np.argmax(outputs, axis=1) max_targets = np.argmax(targets, axis=1) sum = outputs.shape[0] - np.count_nonzero(max_outputs - max_targets) accuracy = sum / outputs.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) accuracy = 0 output = model.forward(X) predictions = one_hot_encode(np.array([np.argmax(output, axis=1)]).T, 10) correct_pred = np.count_nonzero(targets * predictions) total_pred = output.shape[0] accuracy = correct_pred / total_pred return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ number_of_predictions = X.shape[0] number_of_rights = 0 y_hat = model.forward(X) for i in range(0, number_of_predictions): y_hat[i] = np.around(y_hat[i]) if np.array_equal(y_hat[i], targets[i]): number_of_rights += 1 accuracy = number_of_rights / number_of_predictions return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (copy from last assignment) predictions = model.forward(X) accuracy = 0 for n in range(X.shape[0]): prediction = np.argmax(predictions[n, :]) target = np.argmax(targets[n, :]) if prediction == target: accuracy += 1 return accuracy / X.shape[0]
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ outputs = model.forward(X) N = targets.shape[0] correctOutputs = 0 for i in range(N): target = np.where(targets[i] == 1)[0][0] output = np.argmax(outputs[i]) if target == output: correctOutputs += 1 return correctOutputs / N
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # forward pass logits = model.forward(X) # finding the index of the max values for both arrays logits = logits.argmax(axis=1) targets = targets.argmax(axis=1) # counting the equal entries and averaging accuracy = np.count_nonzero(np.equal(targets, logits)) / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # Compute the outputs outputs = model.forward(X) # Counting the correct predictions nb_predictions = outputs.shape[0] nb_correct_predictions = 0 for row, output in enumerate(outputs): index = np.argmax(output) if targets[row][index] == 1: nb_correct_predictions += 1 accuracy = nb_correct_predictions / nb_predictions return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # Copied from last assignment. accuracy = 0 lgts = model.forward(X) lgts_max = np.argmax(lgts, axis=1) targets_max = np.argmax(targets, axis=1) accuracy = ((1 / targets.shape[0]) * np.sum([(1 if l == t else 0) for (l, t) in zip(lgts_max, targets_max)])) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ logits = model.forward(X) correctPredictions = 0.0 numberOfPredictions = logits.shape[0] for i in range(logits.shape[0]): prediction = np.argmax(logits[i]) target = np.argmax(targets[i]) if(target == prediction): correctPredictions += 1 accuracy = correctPredictions / numberOfPredictions return accuracy
model2, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history2, val_history2 = trainer2.train(num_epochs) print("model from 4e") print("Train accuracy:", calculate_accuracy(X_train, Y_train, model2)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model2)) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model2.forward(X_val))) #Plotting training/validation - loss/accuracy comparing the two models: plt.figure(figsize=(20, 12)) plt.subplot(1, 2, 1) plt.ylim([0., .9]) utils.plot_loss(train_history2["loss"], "Train - 10 hidden layers") utils.plot_loss(train_history1["loss"], "Train - 2 hidden layers") utils.plot_loss(train_history["loss"], "Train - 1 hidden layer") utils.plot_loss(val_history2["loss"], "Validation - 10 hidden layers") utils.plot_loss(val_history1["loss"], "Validation - 2 hidden layers") utils.plot_loss(val_history["loss"], "Validation - 1 hidden layer") #similar legend as accuracy plot: plt.legend() plt.xlabel("Number of Training Steps") plt.ylabel("Training/Validation Loss")
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Early stop variables early_stopped_weight_j = np.zeros( (model.ws[0].shape[0], model.ws[0].shape[1])) early_stopped_weight_k = np.zeros( (model.ws[1].shape[0], model.ws[1].shape[1])) early_stop_counter = 0 best_loss = float("inf") global_step = 0 for epoch in tqdm(range(num_epochs)): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) # Update the weights model.ws[0] = model.ws[0] - learning_rate * model.grads[0] model.ws[1] = model.ws[1] - learning_rate * model.grads[1] # Track training loss continuously over the entire X_Train and not only the current batch #outputs_training = model.forward(X_train) #_train_loss = cross_entropy_loss(Y_batch, outputs) #train_loss[global_step] = _train_loss # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: # Test the validation data on the network outputs_validation = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, outputs_validation) val_loss[global_step] = _val_loss # Track training loss over the entire X_Train and not only the current batch # once every validation epoch outputs_training = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, outputs_training) train_loss[global_step] = _train_loss # Early stop implementation # If the loss does not reduce compared to best loss, increment counter # Otherwise, set the counter to 0 and update best loss if _val_loss >= best_loss: early_stop_counter += 1 else: early_stop_counter = 0 best_loss = _val_loss early_stopped_weight_j = model.ws[0] early_stopped_weight_k = model.ws[1] # If 30 times in a row a new best loss was not achieved, stop the program if early_stop_counter == 30: print( "The cross entropy loss for validation data increased too much, thus triggering " "the early stop at step : " + str(global_step) + " and epoch : " + str(epoch)) model.ws[0] = early_stopped_weight_j model.ws[1] = early_stopped_weight_k return model, train_loss, val_loss, train_accuracy, val_accuracy train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
use_momentum = False model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) model, train_loss, val_loss, train_accuracy, val_accuracy = train( model, [X_train, Y_train, X_val, Y_val, X_test, Y_test], num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, use_shuffle=use_shuffle, use_momentum=use_momentum, momentum_gamma=momentum_gamma) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Test Cross Entropy Loss:", cross_entropy_loss(Y_test, model.forward(X_test))) print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Final Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("Final Test accuracy:", calculate_accuracy(X_test, Y_test, model)) # Execution time calculation end = time.time() time_in_seconds = end - start if (time_in_seconds > 60): print("The process took: " + str(int(time_in_seconds / 60)) + "min " +
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: outputs = model.forward(X) return np.mean(np.argmax(targets, axis=1) == np.argmax(outputs, axis=1))
model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) print("32 neurons") print("Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) ###Model with 128 neurons in hidden layer### neurons_per_layer = [128, 10] model1 = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer1 = SoftmaxTrainer( momentum_gamma, use_momentum, model1, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val,
model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) model, train_loss, val_loss, train_accuracy, val_accuracy = train( model, [X_train, Y_train, X_val, Y_val, X_test, Y_test], num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, use_shuffle=use_shuffle, use_momentum=use_momentum, momentum_gamma=momentum_gamma, all_tricks=all_tricks) # Process the results in something readable print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Test Cross Entropy Loss:", cross_entropy_loss(Y_test, model.forward(X_test))) print("Final Train accuracy:", calculate_accuracy(model.forward(X_train), Y_train, model)) print("Final Validation accuracy:", calculate_accuracy(model.forward(X_val), Y_val, model)) print("Final Test accuracy:", calculate_accuracy(model.forward(X_test), Y_test, model)) title_tricks = str() if use_shuffle: title_tricks += "&shuffle" if use_improved_sigmoid: title_tricks += "&impr_sigmoid"
# Simple test on one-hot encoding Y = np.zeros((1, 1), dtype=int) Y[0, 0] = 3 Y = one_hot_encode(Y, 10) assert Y[0, 3] == 1 and Y.sum() == 1, \ f"Expected the vector to be [0,0,0,1,0,0,0,0,0,0], but got {Y}" X_train, Y_train, *_ = utils.load_full_mnist(0.1) X_train = pre_process_images(X_train) Y_train = one_hot_encode(Y_train, 10) assert X_train.shape[1] == 785,\ f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}" # Modify your network here neurons_per_layer = [64, 64, 10] use_improved_sigmoid = True use_improved_weight_init = True model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) logits = model.forward(X_train) np.testing.assert_almost_equal( logits.mean(), 1 / 10, err_msg= "Since the weights are all 0's, the softmax activation should be 1/10") # Gradient approximation check for 100 images X_train = X_train[:100] Y_train = Y_train[:100] gradient_approximation_test(model, X_train, Y_train)
Y_val = one_hot_encode(Y_val, 10) print("Training standard model:\n") model = SoftmaxModel( neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("\n\n") # Example created in assignment text - Comparing with and without shuffling. # YOU CAN DELETE EVERYTHING BELOW! # model with improved sigmoid use_improved_sigmoid = True print("Training model with improved sigmoid:\n") model_is = SoftmaxModel(
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float, use_shift=False): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} #Variables for early stopping last_val_loss = 1 best_val_loss = 1 best_weights = None increased_last_time = False # Store last weights update term for momentum last_weights_update = [] for l in range(len(model.ws)): last_weights_update.append(np.zeros_like(model.ws[l])) global_step = 0 for epoch in range(num_epochs): print("Epoch:", epoch) for step in range(num_batches_per_epoch): shift = np.random.randint(low=-2, high=3, size=batch_size) start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] X_local = X_batch if use_shift: X_local = np.roll(X_batch[:, :784], shift, axis=1) ones = np.ones((X_local.shape[0], 1)) X_local = np.concatenate((X_local, ones), axis=1) train_output = model.forward(X_batch) model.backward(X_batch, train_output, Y_batch) for l in range(len(model.ws)): if use_momentum: update_term = momentum_gamma * last_weights_update[ l] - learning_rate * model.grads[l] model.ws[l] += update_term last_weights_update[l] = update_term else: model.ws[l] -= learning_rate * model.grads[l] # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: val_output = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_output) val_loss[global_step] = _val_loss train_output = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, train_output) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # In order to keep labels in the right order, we shuffle an array of indices # and then apply this ordering to both inputs and labels if use_shuffle: indices = np.arange(X_train.shape[0]) np.random.shuffle(indices) X_train = X_train[indices] Y_train = Y_train[indices] # Compute validation loss for early stopping val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) if _val_loss <= best_val_loss: best_weights = model.ws best_val_loss = _val_loss if _val_loss > last_val_loss: if increased_last_time: model.ws = best_weights break else: increased_last_time = True else: increased_last_time = False last_val_loss = _val_loss return model, train_loss, val_loss, train_accuracy, val_accuracy