def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) if X_train.shape[1] == 784: X_train = pre_process_images(X_train) if X_test.shape[1] == 784: X_test = pre_process_images(X_test) if X_val.shape[1] == 784: X_val = pre_process_images(X_val) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) model.w += -1 * learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test, early_stopping_step # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) # Early stopping var init last_loss = INT_MAX already_failed = 0 global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # The mini-batch gradient descent algorithm for m batches and a single epoch. model.backward(X_batch, model.forward(X_batch), Y_batch) model.w = model.w - learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, model.forward(X_batch)) train_loss[global_step] = _train_loss[0, 0] # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss[0, 0] train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) # Early stopping criteria if (_val_loss[0, 0] > last_loss and already_failed > 20): # Stop early #print("Early stopping kicked in at epoch nr.:",epoch+1) #return model, train_loss, val_loss, train_accuracy, val_accuracy if early_stopping_step == 0: early_stopping_step = global_step # Means failed this round elif (_val_loss[0, 0] > last_loss): already_failed += 1 # The loss improved this round, reset counter else: last_loss = _val_loss[0, 0] already_failed = 0 global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train(num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) global_step = 0 last_val_loss = 1 best_val_loss = 1 best_weights = None for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Forward pass train_outputs = model.forward(X_batch) # Backward propagation model.backward(X_batch, train_outputs, Y_batch) model.w -= learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, train_outputs) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # Compute validation loss for early stopping val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) if _val_loss <= best_val_loss: best_weights = model.w best_val_loss = _val_loss if _val_loss > last_val_loss: model.w = best_weights break last_val_loss = _val_loss return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda, X_train.shape[0]) # initialize weights and outputs model.w = np.zeros((785, 1)) # for early stopping is_val_loss_increasing = [False] * num_increases global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # forward and backward pass output = model.forward(X_batch) model.backward(X_batch, output, Y_batch) # update weights model.w = model.w - learning_rate * model.grad # Track training loss continuously output_train = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, output_train) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: output_val = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, output_val) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) # early stopping stopping = False if with_stopping == True and global_step > 0: stopping = early_stopping(num_increases, is_val_loss_increasing, val_loss, global_step, num_steps_per_val) if with_stopping == True and stopping is True: break global_step += 1 if with_stopping == True and stopping is True: print('Epoch =', epoch) break return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) model.w = model.w - learning_rate * model.grad # Track training loss continuously _train_loss = 0 _train_loss = cross_entropy_loss(Y_batch, outputs) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = 0 _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) #early stopping counter = 0 for i in range(10): current = (global_step - i * num_steps_per_val) previous = (global_step - (i + 1) * num_steps_per_val) #when counter is 9 or greater early stopping kicks in if (len(val_accuracy) > 8 * num_steps_per_val and val_accuracy[current] <= val_accuracy[previous]): counter += 1 if counter > 8: print("Stopping early at step: " + str(global_step) + " in epoch " + str(epoch)) return model, train_loss, val_loss, train_accuracy, val_accuracy global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy