def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # The mini-batch gradient descent algorithm model.backward(X_batch, model.forward(X_batch), Y_batch) model.ws = model.ws - learning_rate * np.array(model.grads) # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss _train_loss = cross_entropy_loss(Y_batch, model.forward(X_batch)) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float, use_shift=False): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} #Variables for early stopping last_val_loss = 1 best_val_loss = 1 best_weights = None increased_last_time = False # Store last weights update term for momentum last_weights_update = [] for l in range(len(model.ws)): last_weights_update.append(np.zeros_like(model.ws[l])) global_step = 0 for epoch in range(num_epochs): print("Epoch:", epoch) for step in range(num_batches_per_epoch): shift = np.random.randint(low=-2, high=3, size=batch_size) start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] X_local = X_batch if use_shift: X_local = np.roll(X_batch[:, :784], shift, axis=1) ones = np.ones((X_local.shape[0], 1)) X_local = np.concatenate((X_local, ones), axis=1) train_output = model.forward(X_batch) model.backward(X_batch, train_output, Y_batch) for l in range(len(model.ws)): if use_momentum: update_term = momentum_gamma * last_weights_update[ l] - learning_rate * model.grads[l] model.ws[l] += update_term last_weights_update[l] = update_term else: model.ws[l] -= learning_rate * model.grads[l] # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: val_output = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_output) val_loss[global_step] = _val_loss train_output = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, train_output) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # In order to keep labels in the right order, we shuffle an array of indices # and then apply this ordering to both inputs and labels if use_shuffle: indices = np.arange(X_train.shape[0]) np.random.shuffle(indices) X_train = X_train[indices] Y_train = Y_train[indices] # Compute validation loss for early stopping val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) if _val_loss <= best_val_loss: best_weights = model.ws best_val_loss = _val_loss if _val_loss > last_val_loss: if increased_last_time: model.ws = best_weights break else: increased_last_time = True else: increased_last_time = False last_val_loss = _val_loss return model, train_loss, val_loss, train_accuracy, val_accuracy