def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} if use_momentum: learning_rate = 0.02 global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Track train / validation loss / accuracy # every time we progress 20% through the dataset prev_grads = model.grads outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) for i in range(len(model.ws)): if use_momentum: model.ws[i] = model.ws[i] - learning_rate * (model.grads[i] + momentum_gamma * prev_grads[i]) else: model.ws[i] = model.ws[i] - learning_rate * model.grads[i] if (global_step % num_steps_per_val) == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss _train_loss = cross_entropy_loss(Y_train, model.forward(X_train)) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # shuffle training examples after each epoch if use_shuffle: X_train, Y_train = unison_shuffled_copies(X_train, Y_train) return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Track train / validation loss / accuracy # every time we progress 20% through the dataset outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) # update weigths model.ws[-1] = model.ws[-1] - learning_rate * model.grads[-1] model.ws[-2] = model.ws[-2] - learning_rate * model.grads[-2] if (global_step % num_steps_per_val) == 0: _outputs_train = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, _outputs_train) train_loss[global_step] = _train_loss _outputs_val = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, _outputs_val) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Early stop variables early_stopped_weight_j = np.zeros( (model.ws[0].shape[0], model.ws[0].shape[1])) early_stopped_weight_k = np.zeros( (model.ws[1].shape[0], model.ws[1].shape[1])) early_stop_counter = 0 best_loss = float("inf") global_step = 0 for epoch in tqdm(range(num_epochs)): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) # Update the weights model.ws[0] = model.ws[0] - learning_rate * model.grads[0] model.ws[1] = model.ws[1] - learning_rate * model.grads[1] # Track training loss continuously over the entire X_Train and not only the current batch #outputs_training = model.forward(X_train) #_train_loss = cross_entropy_loss(Y_batch, outputs) #train_loss[global_step] = _train_loss # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: # Test the validation data on the network outputs_validation = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, outputs_validation) val_loss[global_step] = _val_loss # Track training loss over the entire X_Train and not only the current batch # once every validation epoch outputs_training = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, outputs_training) train_loss[global_step] = _train_loss # Early stop implementation # If the loss does not reduce compared to best loss, increment counter # Otherwise, set the counter to 0 and update best loss if _val_loss >= best_loss: early_stop_counter += 1 else: early_stop_counter = 0 best_loss = _val_loss early_stopped_weight_j = model.ws[0] early_stopped_weight_k = model.ws[1] # If 30 times in a row a new best loss was not achieved, stop the program if early_stop_counter == 30: print( "The cross entropy loss for validation data increased too much, thus triggering " "the early stop at step : " + str(global_step) + " and epoch : " + str(epoch)) model.ws[0] = early_stopped_weight_j model.ws[1] = early_stopped_weight_k return model, train_loss, val_loss, train_accuracy, val_accuracy train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float, use_shift=False): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} #Variables for early stopping last_val_loss = 1 best_val_loss = 1 best_weights = None increased_last_time = False # Store last weights update term for momentum last_weights_update = [] for l in range(len(model.ws)): last_weights_update.append(np.zeros_like(model.ws[l])) global_step = 0 for epoch in range(num_epochs): print("Epoch:", epoch) for step in range(num_batches_per_epoch): shift = np.random.randint(low=-2, high=3, size=batch_size) start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] X_local = X_batch if use_shift: X_local = np.roll(X_batch[:, :784], shift, axis=1) ones = np.ones((X_local.shape[0], 1)) X_local = np.concatenate((X_local, ones), axis=1) train_output = model.forward(X_batch) model.backward(X_batch, train_output, Y_batch) for l in range(len(model.ws)): if use_momentum: update_term = momentum_gamma * last_weights_update[ l] - learning_rate * model.grads[l] model.ws[l] += update_term last_weights_update[l] = update_term else: model.ws[l] -= learning_rate * model.grads[l] # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: val_output = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_output) val_loss[global_step] = _val_loss train_output = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, train_output) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # In order to keep labels in the right order, we shuffle an array of indices # and then apply this ordering to both inputs and labels if use_shuffle: indices = np.arange(X_train.shape[0]) np.random.shuffle(indices) X_train = X_train[indices] Y_train = Y_train[indices] # Compute validation loss for early stopping val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) if _val_loss <= best_val_loss: best_weights = model.ws best_val_loss = _val_loss if _val_loss > last_val_loss: if increased_last_time: model.ws = best_weights break else: increased_last_time = True else: increased_last_time = False last_val_loss = _val_loss return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float, all_tricks=False): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Important hyper parameter setting if use_momentum: learning_rate = 0.02 # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Early stop variables early_stopped_weight_j = np.zeros( (model.ws[0].shape[0], model.ws[0].shape[1])) early_stopped_weight_k = np.zeros( (model.ws[1].shape[0], model.ws[1].shape[1])) early_stop_counter = 0 best_loss = float("inf") global_step = 0 for epoch in tqdm(range(num_epochs)): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Compute the gradient outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) # Update the weights with or without task3d, momemtum gradient for layer in range(len(model.neurons_per_layer)): if use_momentum: new_weights = model.ws[layer] - learning_rate * model.grads[ layer] + momentum_gamma * model.delta_w[layer] model.delta_w[layer] = new_weights - model.ws[layer] model.ws[layer] = new_weights else: model.ws[layer] = model.ws[ layer] - learning_rate * model.grads[layer] # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: # Test the validation data on the network outputs_validation = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, outputs_validation) val_loss[global_step] = _val_loss # Track training loss over the entire X_Train and not only the current batch # once every validation epoch outputs_training = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, outputs_training) train_loss[global_step] = _train_loss # Track the accuracy if not all_tricks: train_accuracy[global_step] = calculate_accuracy( outputs_training, Y_train, model) val_accuracy[global_step] = calculate_accuracy( outputs_validation, Y_val, model) # Early stop implementation # If the loss does not reduce compared to best loss, increment counter # Otherwise, set the counter to 0 and update best loss if _val_loss >= best_loss: early_stop_counter += 1 else: early_stop_counter = 0 best_loss = _val_loss early_stopped_weight_j = model.ws[0] early_stopped_weight_k = model.ws[1] # If 30 times in a row a new best loss was not achieved, stop the program if early_stop_counter == 30: print( "\nThe cross entropy loss for validation data increased too much, thus triggering " "the early stop at step : " + str(global_step) + " and epoch : " + str(epoch)) model.ws[0] = early_stopped_weight_j model.ws[1] = early_stopped_weight_k return model, train_loss, val_loss, train_accuracy, val_accuracy global_step += 1 # Task 3a: Shuffle training samples after each epoch if use_shuffle: # Use the shuffle function from sklearn X_train, Y_train = shuffle(X_train, Y_train) return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, use_early_stopping: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} momentum = [0 for i in range(len(model.grads))] #Variables used for early stopping mean_val_loss = [] list_val_losses = [] global_loss_counter = 2 global_step = 0 for epoch in range(num_epochs): # Shuffling before next epoch if use_shuffle == True: shuffle_in_unison(X_train, Y_train) for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) if use_momentum == True: momentum[0] = (1 - momentum_gamma) * model.grads[ 0] + momentum_gamma * momentum[0] momentum[1] = (1 - momentum_gamma) * model.grads[ 1] + momentum_gamma * momentum[1] model.ws[0] += -1 * learning_rate * (momentum[0]) model.ws[1] += -1 * learning_rate * (momentum[1]) else: model.ws[0] += -1 * learning_rate * model.grads[0] model.ws[1] += -1 * learning_rate * model.grads[1] # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) #Early stopping if use_early_stopping == True: list_val_losses.append(_val_loss) if global_loss_counter % 5 == 0: mean_val_loss.append(np.mean(list_val_losses)) list_val_losses = [] if global_loss_counter % 10 == 0: if mean_val_loss[0] < mean_val_loss[1]: return model, train_loss, val_loss, train_accuracy, val_accuracy mean_val_loss = [] global_loss_counter += 1 global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy