def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} if use_momentum: learning_rate = 0.02 global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Track train / validation loss / accuracy # every time we progress 20% through the dataset prev_grads = model.grads outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) for i in range(len(model.ws)): if use_momentum: model.ws[i] = model.ws[i] - learning_rate * (model.grads[i] + momentum_gamma * prev_grads[i]) else: model.ws[i] = model.ws[i] - learning_rate * model.grads[i] if (global_step % num_steps_per_val) == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss _train_loss = cross_entropy_loss(Y_train, model.forward(X_train)) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # shuffle training examples after each epoch if use_shuffle: X_train, Y_train = unison_shuffled_copies(X_train, Y_train) return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) if X_train.shape[1] == 784: X_train = pre_process_images(X_train) if X_test.shape[1] == 784: X_test = pre_process_images(X_test) if X_val.shape[1] == 784: X_val = pre_process_images(X_val) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) model.w += -1 * learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Track train / validation loss / accuracy # every time we progress 20% through the dataset outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) # update weigths model.ws[-1] = model.ws[-1] - learning_rate * model.grads[-1] model.ws[-2] = model.ws[-2] - learning_rate * model.grads[-2] if (global_step % num_steps_per_val) == 0: _outputs_train = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, _outputs_train) train_loss[global_step] = _train_loss _outputs_val = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, _outputs_val) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ logits = self.model.forward(X_batch) self.model.backward(X_batch, logits, Y_batch) loss = cross_entropy_loss(Y_batch, logits) # updating weights if self.use_momentum: self.model.momentum_update_weights(self.learning_rate, self.momentum_grads) self.momentum_grads = self.momentum_gamma * self.model.grads.copy() else: self.model.update_weights(self.learning_rate) return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # TODO: Implement this function (task 2c) model = self.model logits = model.forward(X_batch) model.backward(X_batch, logits, Y_batch) if (self.use_momentum): for i, grad in enumerate(model.grads): model.ws[i] -= self.learning_rate * self.previous_grads[i] self.previous_grads[ i] = grad + self.momentum_gamma * self.previous_grads[i] else: for i, grad in enumerate(model.grads): model.ws[i] -= self.learning_rate * grad loss = cross_entropy_loss(Y_batch, logits) # sol return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # TODO: Implement this function (task 2c) dw_1 = 0 dw_2 = 0 logits = self.model.forward(X_batch) self.model.backward(X_batch, logits, Y_batch) if(self.use_momentum): dw_1 = np.add(self.model.grads[0], self.momentum_gamma * dw_1) dw_2 = np.add(self.model.grads[1], self.momentum_gamma * dw_2) self.model.ws[0] = np.add(self.model.ws[0], -self.learning_rate * dw_1) self.model.ws[1] = np.add(self.model.ws[1], -self.learning_rate * dw_2) else: self.model.ws[0] = np.add(self.model.ws[0], -self.learning_rate * self.model.grads[0]) self.model.ws[1] = np.add(self.model.ws[1], -self.learning_rate * self.model.grads[1]) loss = cross_entropy_loss(Y_batch, logits) # sol return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # TODO: Implement this function (task 2b) # Forward step (retrieving the predictions) outputs = self.model.forward(X_batch) # Backward step self.model.backward(X_batch, outputs, Y_batch) # Updating the weights self.model.w -= self.model.grad * self.learning_rate # Computing the loss loss = cross_entropy_loss(Y_batch, outputs) return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # TODO: Implement this function (task 2c) logits = self.model.forward(X_batch) self.model.backward(X_batch, logits, Y_batch) # update weights #self.model.ws = self.model.w - self.learning_rate*self.model.grad for i in range(len(self.model.ws)): if (self.use_momentum): self.model.ws[i] = self.model.ws[i] - ( self.learning_rate * self.model.grads[i] + self.momentum_gamma * self.previous_grads[i] * self.learning_rate) self.previous_grads[i] = self.model.grads[ i] + self.previous_grads[i] * self.momentum_gamma else: self.model.ws[i] = self.model.ws[ i] - self.learning_rate * self.model.grads[i] loss = cross_entropy_loss(Y_batch, logits) return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # Perform forward pass to get outputs (predictions) Yhat_batch = self.model.forward(X_batch) # Perform backward pass to get gradiant self.model.backward(X_batch, Yhat_batch, Y_batch) # Update weights in gradiant step self.model.w = self.model.w - self.learning_rate * self.model.grad # Calculate cross entropy loss loss = cross_entropy_loss(Y_batch, Yhat_batch) return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # TODO: Implement this function (task 2c) outputs = self.model.forward(X_batch) self.model.backward(X_batch, outputs, Y_batch) if self.use_momentum: for i in range(len(self.model.ws)): # Momentum implementation was a bit unclear in assignment, but from Piazza question @115 I assume this is correct self.momentum[i] = self.previous_grads[ i] + self.momentum_gamma * self.momentum[i] self.model.ws[i] = self.model.ws[ i] - self.learning_rate * self.momentum[i] else: for i in range(len(self.model.ws)): self.model.ws[i] = self.model.ws[ i] - self.learning_rate * self.model.grads[i] self.previous_grads = np.copy(self.model.grads) return cross_entropy_loss(Y_batch, outputs)
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # TODO: Implement this function (task 2c) loss = 0 output = self.model.forward(X_batch) self.model.backward(X_batch, output, Y_batch) #if without momentum do gradient step of approperiate weight, otherwise do momentum gradient step: if not self.use_momentum: for i, grad in zip(range(len(self.model.ws)), self.model.grads): self.model.ws[i] = self.model.ws[i] - self.learning_rate * grad else: for i, grad in zip(range(len(self.model.ws)), self.model.grads): self.vs[i] = grad + self.momentum_gamma * self.vs[i] self.model.ws[ i] = self.model.ws[i] - self.learning_rate * self.vs[i] loss = cross_entropy_loss(Y_batch, output) return loss
def train_and_evaluate( neurons_per_layer: int, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_improved_sigmoid: bool, use_improved_weight_init: bool, use_momentum: bool, momentum_gamma: float, use_shift=False): model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) model, train_loss, val_loss, train_accuracy, val_accuracy = train( model, datasets, num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, use_shuffle=use_shuffle, use_momentum=use_momentum, momentum_gamma=momentum_gamma, use_shift=use_shift) print("----------", use_shuffle, use_improved_sigmoid, use_improved_weight_init, use_momentum, momentum_gamma, "----------") print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Test Cross Entropy Loss:", cross_entropy_loss(Y_test, model.forward(X_test))) print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Final Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("Final Test accuracy:", calculate_accuracy(X_test, Y_test, model)) return train_loss, val_loss, train_accuracy, val_accuracy
def validation_step(self): """ Perform a validation step to evaluate the model at the current step for the validation set. Also calculates the current accuracy of the model on the train set. Returns: loss (float): cross entropy loss over the whole dataset accuracy_ (float): accuracy over the whole dataset Returns: loss value (float) on batch """ # NO NEED TO CHANGE THIS FUNCTION logits = self.model.forward(self.X_val) loss = cross_entropy_loss(Y_val, logits) accuracy_train = calculate_accuracy(X_train, Y_train, self.model) accuracy_val = calculate_accuracy(X_val, Y_val, self.model) return loss, accuracy_train, accuracy_val
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ out = self.model.forward(X_batch) self.model.backward(X_batch, out, Y_batch) self.model.w -= self.model.grad * self.learning_rate loss = cross_entropy_loss(Y_batch, out) return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # TODO: Implement this function (task 2c) loss = 0 loss = cross_entropy_loss(Y_batch, logits) # sol return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # TODO: Implement this function (task 2b) y = self.model.forward(X_batch) self.model.backward(X_batch, y, Y_batch) # compute the gradient delta_W = self.model.grad self.model.w = self.model.w - self.learning_rate * delta_W #perform gradient descent step loss = cross_entropy_loss(Y_batch, y) return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # Perform forward pass to get outputs (predictions) Yhat_batch = self.model.forward(X_batch) # Perform backward pass to get gradiant self.model.backward(X_batch, Yhat_batch, Y_batch) if self.use_momentum: # update delta w (Formula 6) for layer_idx, grads in enumerate(self.model.grads): self.previous_grads[ layer_idx] = grads + self.momentum_gamma * self.previous_grads[ layer_idx] # Update weights in gradiant step with momentum for layer_idx, previous_grads in enumerate(self.previous_grads): self.model.ws[layer_idx] = self.model.ws[ layer_idx] - self.learning_rate * previous_grads else: # Update weights in gradiant step for layer_idx, grads in enumerate(self.model.grads): self.model.ws[layer_idx] = self.model.ws[ layer_idx] - self.learning_rate * grads # Calculate cross entropy loss loss = cross_entropy_loss(Y_batch, Yhat_batch) return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ logits = self.model.forward(X_batch) self.model.backward(X_batch, logits, Y_batch) for i, w in enumerate(self.model.ws): if self.use_momentum: w -= self.previous_grads[i] * self.learning_rate # t self.previous_grads[i] = self.model.grads[i] + \ self.momentum_gamma*self.previous_grads[i] # t+1 else: w -= self.model.grads[i] * self.learning_rate loss = cross_entropy_loss(Y_batch, logits) return loss
def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray): """ Perform forward, backward and gradient descent step here. The function is called once for every batch (see trainer.py) to perform the train step. The function returns the mean loss value which is then automatically logged in our variable self.train_history. Args: X: one batch of images Y: one batch of labels Returns: loss value (float) on batch """ # TODO: Implement this function (task 2c) loss = 0 outputs = self.model.forward(X_batch) self.model.backward(X_batch, outputs, Y_batch) if (self.use_momentum): #FIX THIS self.previous_grads[0] = self.model.grads[ 0] + self.momentum_gamma * self.previous_grads[0] self.previous_grads[1] = self.model.grads[ 1] + self.momentum_gamma * self.previous_grads[1] self.model.ws[0] = self.model.ws[0] - self.learning_rate * ( self.previous_grads[0]) self.model.ws[1] = self.model.ws[1] - self.learning_rate * ( self.previous_grads[1]) else: self.model.ws[0] = self.model.ws[ 0] - self.model.grads[0] * self.learning_rate self.model.ws[1] = self.model.ws[ 1] - self.model.grads[1] * self.learning_rate loss = cross_entropy_loss(Y_batch, outputs) return loss
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda, X_train.shape[0]) # initialize weights and outputs model.w = np.zeros((785, 1)) # for early stopping is_val_loss_increasing = [False] * num_increases global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # forward and backward pass output = model.forward(X_batch) model.backward(X_batch, output, Y_batch) # update weights model.w = model.w - learning_rate * model.grad # Track training loss continuously output_train = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, output_train) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: output_val = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, output_val) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) # early stopping stopping = False if with_stopping == True and global_step > 0: stopping = early_stopping(num_increases, is_val_loss_increasing, val_loss, global_step, num_steps_per_val) if with_stopping == True and stopping is True: break global_step += 1 if with_stopping == True and stopping is True: print('Epoch =', epoch) break return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, use_early_stopping: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} momentum = [0 for i in range(len(model.grads))] #Variables used for early stopping mean_val_loss = [] list_val_losses = [] global_loss_counter = 2 global_step = 0 for epoch in range(num_epochs): # Shuffling before next epoch if use_shuffle == True: shuffle_in_unison(X_train, Y_train) for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) if use_momentum == True: momentum[0] = (1 - momentum_gamma) * model.grads[ 0] + momentum_gamma * momentum[0] momentum[1] = (1 - momentum_gamma) * model.grads[ 1] + momentum_gamma * momentum[1] model.ws[0] += -1 * learning_rate * (momentum[0]) model.ws[1] += -1 * learning_rate * (momentum[1]) else: model.ws[0] += -1 * learning_rate * model.grads[0] model.ws[1] += -1 * learning_rate * model.grads[1] # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) #Early stopping if use_early_stopping == True: list_val_losses.append(_val_loss) if global_loss_counter % 5 == 0: mean_val_loss.append(np.mean(list_val_losses)) list_val_losses = [] if global_loss_counter % 10 == 0: if mean_val_loss[0] < mean_val_loss[1]: return model, train_loss, val_loss, train_accuracy, val_accuracy mean_val_loss = [] global_loss_counter += 1 global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train(num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) global_step = 0 last_val_loss = 1 best_val_loss = 1 best_weights = None for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Forward pass train_outputs = model.forward(X_batch) # Backward propagation model.backward(X_batch, train_outputs, Y_batch) model.w -= learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, train_outputs) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # Compute validation loss for early stopping val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) if _val_loss <= best_val_loss: best_weights = model.w best_val_loss = _val_loss if _val_loss > last_val_loss: model.w = best_weights break last_val_loss = _val_loss return model, train_loss, val_loss, train_accuracy, val_accuracy
X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_binary_dataset( category1, category2, validation_percentage) # hyperparameters num_epochs = 50 learning_rate = 0.2 batch_size = 128 l2_reg_lambda = 0 model, train_loss, val_loss, train_accuracy, val_accuracy = train( num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, l2_reg_lambda=l2_reg_lambda) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(pre_process_images(X_train)))) print("Final Test Entropy Loss:", cross_entropy_loss(Y_test, model.forward(pre_process_images(X_test)))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(pre_process_images(X_val)))) print("Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("Test accuracy:", calculate_accuracy(X_test, Y_test, model)) # Plot loss #plt.ylim([0., .4]) utils.plot_loss(train_loss, "Training Loss") utils.plot_loss(val_loss, "Validation Loss") plt.legend() plt.savefig("binary_train_loss.png")
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test, early_stopping_step # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) # Early stopping var init last_loss = INT_MAX already_failed = 0 global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # The mini-batch gradient descent algorithm for m batches and a single epoch. model.backward(X_batch, model.forward(X_batch), Y_batch) model.w = model.w - learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, model.forward(X_batch)) train_loss[global_step] = _train_loss[0, 0] # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss[0, 0] train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) # Early stopping criteria if (_val_loss[0, 0] > last_loss and already_failed > 20): # Stop early #print("Early stopping kicked in at epoch nr.:",epoch+1) #return model, train_loss, val_loss, train_accuracy, val_accuracy if early_stopping_step == 0: early_stopping_step = global_step # Means failed this round elif (_val_loss[0, 0] > last_loss): already_failed += 1 # The loss improved this round, reset counter else: last_loss = _val_loss[0, 0] already_failed = 0 global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
model2, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history2, val_history2 = trainer2.train(num_epochs) print("model from 4e") print("Train accuracy:", calculate_accuracy(X_train, Y_train, model2)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model2)) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model2.forward(X_val))) #Plotting training/validation - loss/accuracy comparing the two models: plt.figure(figsize=(20, 12)) plt.subplot(1, 2, 1) plt.ylim([0., .9]) utils.plot_loss(train_history2["loss"], "Train - 10 hidden layers") utils.plot_loss(train_history1["loss"], "Train - 2 hidden layers") utils.plot_loss(train_history["loss"], "Train - 1 hidden layer") utils.plot_loss(val_history2["loss"], "Validation - 10 hidden layers") utils.plot_loss(val_history1["loss"], "Validation - 2 hidden layers") utils.plot_loss(val_history["loss"], "Validation - 1 hidden layer") #similar legend as accuracy plot: plt.legend() plt.xlabel("Number of Training Steps") plt.ylabel("Training/Validation Loss")
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float, all_tricks=False): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Important hyper parameter setting if use_momentum: learning_rate = 0.02 # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Early stop variables early_stopped_weight_j = np.zeros( (model.ws[0].shape[0], model.ws[0].shape[1])) early_stopped_weight_k = np.zeros( (model.ws[1].shape[0], model.ws[1].shape[1])) early_stop_counter = 0 best_loss = float("inf") global_step = 0 for epoch in tqdm(range(num_epochs)): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Compute the gradient outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) # Update the weights with or without task3d, momemtum gradient for layer in range(len(model.neurons_per_layer)): if use_momentum: new_weights = model.ws[layer] - learning_rate * model.grads[ layer] + momentum_gamma * model.delta_w[layer] model.delta_w[layer] = new_weights - model.ws[layer] model.ws[layer] = new_weights else: model.ws[layer] = model.ws[ layer] - learning_rate * model.grads[layer] # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: # Test the validation data on the network outputs_validation = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, outputs_validation) val_loss[global_step] = _val_loss # Track training loss over the entire X_Train and not only the current batch # once every validation epoch outputs_training = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, outputs_training) train_loss[global_step] = _train_loss # Track the accuracy if not all_tricks: train_accuracy[global_step] = calculate_accuracy( outputs_training, Y_train, model) val_accuracy[global_step] = calculate_accuracy( outputs_validation, Y_val, model) # Early stop implementation # If the loss does not reduce compared to best loss, increment counter # Otherwise, set the counter to 0 and update best loss if _val_loss >= best_loss: early_stop_counter += 1 else: early_stop_counter = 0 best_loss = _val_loss early_stopped_weight_j = model.ws[0] early_stopped_weight_k = model.ws[1] # If 30 times in a row a new best loss was not achieved, stop the program if early_stop_counter == 30: print( "\nThe cross entropy loss for validation data increased too much, thus triggering " "the early stop at step : " + str(global_step) + " and epoch : " + str(epoch)) model.ws[0] = early_stopped_weight_j model.ws[1] = early_stopped_weight_k return model, train_loss, val_loss, train_accuracy, val_accuracy global_step += 1 # Task 3a: Shuffle training samples after each epoch if use_shuffle: # Use the shuffle function from sklearn X_train, Y_train = shuffle(X_train, Y_train) return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Early stop variables early_stopped_weight_j = np.zeros( (model.ws[0].shape[0], model.ws[0].shape[1])) early_stopped_weight_k = np.zeros( (model.ws[1].shape[0], model.ws[1].shape[1])) early_stop_counter = 0 best_loss = float("inf") global_step = 0 for epoch in tqdm(range(num_epochs)): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) # Update the weights model.ws[0] = model.ws[0] - learning_rate * model.grads[0] model.ws[1] = model.ws[1] - learning_rate * model.grads[1] # Track training loss continuously over the entire X_Train and not only the current batch #outputs_training = model.forward(X_train) #_train_loss = cross_entropy_loss(Y_batch, outputs) #train_loss[global_step] = _train_loss # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: # Test the validation data on the network outputs_validation = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, outputs_validation) val_loss[global_step] = _val_loss # Track training loss over the entire X_Train and not only the current batch # once every validation epoch outputs_training = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, outputs_training) train_loss[global_step] = _train_loss # Early stop implementation # If the loss does not reduce compared to best loss, increment counter # Otherwise, set the counter to 0 and update best loss if _val_loss >= best_loss: early_stop_counter += 1 else: early_stop_counter = 0 best_loss = _val_loss early_stopped_weight_j = model.ws[0] early_stopped_weight_k = model.ws[1] # If 30 times in a row a new best loss was not achieved, stop the program if early_stop_counter == 30: print( "The cross entropy loss for validation data increased too much, thus triggering " "the early stop at step : " + str(global_step) + " and epoch : " + str(epoch)) model.ws[0] = early_stopped_weight_j model.ws[1] = early_stopped_weight_k return model, train_loss, val_loss, train_accuracy, val_accuracy train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float, use_shift=False): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} #Variables for early stopping last_val_loss = 1 best_val_loss = 1 best_weights = None increased_last_time = False # Store last weights update term for momentum last_weights_update = [] for l in range(len(model.ws)): last_weights_update.append(np.zeros_like(model.ws[l])) global_step = 0 for epoch in range(num_epochs): print("Epoch:", epoch) for step in range(num_batches_per_epoch): shift = np.random.randint(low=-2, high=3, size=batch_size) start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] X_local = X_batch if use_shift: X_local = np.roll(X_batch[:, :784], shift, axis=1) ones = np.ones((X_local.shape[0], 1)) X_local = np.concatenate((X_local, ones), axis=1) train_output = model.forward(X_batch) model.backward(X_batch, train_output, Y_batch) for l in range(len(model.ws)): if use_momentum: update_term = momentum_gamma * last_weights_update[ l] - learning_rate * model.grads[l] model.ws[l] += update_term last_weights_update[l] = update_term else: model.ws[l] -= learning_rate * model.grads[l] # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: val_output = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_output) val_loss[global_step] = _val_loss train_output = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, train_output) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # In order to keep labels in the right order, we shuffle an array of indices # and then apply this ordering to both inputs and labels if use_shuffle: indices = np.arange(X_train.shape[0]) np.random.shuffle(indices) X_train = X_train[indices] Y_train = Y_train[indices] # Compute validation loss for early stopping val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) if _val_loss <= best_val_loss: best_weights = model.ws best_val_loss = _val_loss if _val_loss > last_val_loss: if increased_last_time: model.ws = best_weights break else: increased_last_time = True else: increased_last_time = False last_val_loss = _val_loss return model, train_loss, val_loss, train_accuracy, val_accuracy
# ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED. # Intialize model model = BinaryModel() # Train model trainer = LogisticTrainer( model, learning_rate, batch_size, shuffle_dataset, early_stopping, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) # Plot and print everything you want of information print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) # Plot loss for first model (task 2b) plt.ylim([0., .2]) utils.plot_loss(train_history["loss"], "Training Loss", npoints_to_average=10) utils.plot_loss(val_history["loss"], "Validation Loss") plt.legend() plt.xlabel("Number of Training Steps") plt.ylabel("Cross Entropy Loss - Average") plt.savefig("task2b_binary_train_loss.png") plt.show()
learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history_naked, val_history_naked = trainer_naked.train(num_epochs) print("just basic") print("Train accuracy:", calculate_accuracy(X_train, Y_train, model_naked)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model_naked)) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model_naked.forward(X_val))) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model_naked.forward(X_train))) ######1nd model - improved weights###### use_improved_sigmoid = False use_improved_weight_init = True use_momentum = False model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size,