def train(model, forward, backward, update, alpha, num_epochs, batch_size): """ Trains a simple MLP. :param model: Dictionary of model weights. :param forward: Forward prop function. :param backward: Backward prop function. :param update: Update weights function. :param alpha: Learning rate. :param num_epochs: Number of epochs to run training for. :param batch_size: Mini-batch size, -1 for full batch. :return: A tuple (train_ce, valid_ce, train_acc, valid_acc) WHERE train_ce: Training cross entropy. valid_ce: Validation cross entropy. train_acc: Training accuracy. valid_acc: Validation accuracy. """ inputs_train, inputs_valid, inputs_test, target_train, target_valid, \ target_test = load_data("data/toronto_face.npz") rnd_idx = np.arange(inputs_train.shape[0]) train_ce_list = [] valid_ce_list = [] train_acc_list = [] valid_acc_list = [] top_vs_actual = [] num_train_cases = inputs_train.shape[0] if batch_size == -1: batch_size = num_train_cases num_steps = int(np.ceil(num_train_cases / batch_size)) for epoch in range(num_epochs): np.random.shuffle(rnd_idx) inputs_train = inputs_train[rnd_idx] target_train = target_train[rnd_idx] for step in range(num_steps): # Forward pass. start = step * batch_size end = min(num_train_cases, (step + 1) * batch_size) x = inputs_train[start:end] t = target_train[start:end] var = forward(model, x) prediction = softmax(var["y"]) train_ce = -np.sum(t * np.log(prediction)) / float(x.shape[0]) train_acc = (np.argmax(prediction, axis=1) == np.argmax( t, axis=1)).astype("float").mean() # print(("Epoch {:3d} Step {:2d} Train CE {:.5f} " # "Train Acc {:.5f}").format( # epoch, step, train_ce, train_acc)) # Compute error. error = (prediction - t) / float(x.shape[0]) # Backward prop. backward(model, error, var) # Update weights. update(model, alpha) # # Find confused images. # input_x = inputs_train[step * batch_size:(step + 1) * batch_size] # input_t = target_train[step * batch_size:(step + 1) * batch_size] # # var = forward(model, input_x) # prediction = softmax(var["y"]) # # confused_imgs = [] # top_vs_actual = [] # # for i in range(prediction.shape[0]): # top_class = np.argmax(prediction[i]) # if np.amax(prediction[i]) <= 0.15: # confused_imgs.append(i) # top_vs_actual.append((np.amax(prediction[i]), input_t[i][top_class])) # # if confused_imgs: # confused_img = inputs_train[confused_imgs[-1]] # plt.figure(figsize=(10, 10)) # plt.imshow(confused_img.reshape(48, -1), cmap="gray") # plt.show() # print(top_vs_actual[-1]) # confused_imgs.pop() valid_ce, valid_acc = evaluate(inputs_valid, target_valid, model, forward, batch_size=batch_size) # print(("Epoch {:3d} " # "Validation CE {:.5f} " # "Validation Acc {:.5f}\n").format( # epoch, valid_ce, valid_acc)) train_ce_list.append((epoch, train_ce)) train_acc_list.append((epoch, train_acc)) valid_ce_list.append((epoch, valid_ce)) valid_acc_list.append((epoch, valid_acc)) display_plot(train_ce_list, valid_ce_list, "Cross Entropy", number=1) display_plot(train_acc_list, valid_acc_list, "Accuracy", number=1) train_ce, train_acc = evaluate(inputs_train, target_train, model, forward, batch_size=batch_size) valid_ce, valid_acc = evaluate(inputs_valid, target_valid, model, forward, batch_size=batch_size) test_ce, test_acc = evaluate(inputs_test, target_test, model, forward, batch_size=batch_size) print("CE: Train %.5f Validation %.5f Test %.5f" % (train_ce, valid_ce, test_ce)) print("Acc: Train {:.5f} Validation {:.5f} Test {:.5f}".format( train_acc, valid_acc, test_acc)) stats = { "train_ce": train_ce_list, "valid_ce": valid_ce_list, "train_acc": train_acc_list, "valid_acc": valid_acc_list } return model, stats
def train(model, forward, backward, update, alpha, num_epochs, batch_size): """ Trains a simple MLP. :param model: Dictionary of model weights. :param forward: Forward prop function. :param backward: Backward prop function. :param update: Update weights function. :param alpha: Learning rate. :param num_epochs: Number of epochs to run training for. :param batch_size: Mini-batch size, -1 for full batch. :return: A tuple (train_ce, valid_ce, train_acc, valid_acc) WHERE train_ce: Training cross entropy. valid_ce: Validation cross entropy. train_acc: Training accuracy. valid_acc: Validation accuracy. """ confidence_threshold = 0.5 inputs_train, inputs_valid, inputs_test, target_train, target_valid, \ target_test = load_data("data/toronto_face.npz") rnd_idx = np.arange(inputs_train.shape[0]) train_ce_list = [] valid_ce_list = [] train_acc_list = [] valid_acc_list = [] num_train_cases = inputs_train.shape[0] if batch_size == -1: batch_size = num_train_cases num_steps = int(np.ceil(num_train_cases / batch_size)) for epoch in range(num_epochs): np.random.shuffle(rnd_idx) inputs_train = inputs_train[rnd_idx] target_train = target_train[rnd_idx] for step in range(num_steps): # Forward pass. start = step * batch_size end = min(num_train_cases, (step + 1) * batch_size) x = inputs_train[start:end] t = target_train[start:end] var = forward(model, x) prediction = softmax(var["y"]) # plot images where the neural network isn't confident about the output for i in range(x.shape[0]): if np.amax(prediction[i]) < confidence_threshold: plt.imshow(x[i].reshape(48, 48), cmap='gray') plt.show() print( f'The model prediction is {np.argmax(prediction[i]) == np.argmax(t[i])}' ) train_ce = -np.sum(t * np.log(prediction)) / float(x.shape[0]) train_acc = (np.argmax(prediction, axis=1) == np.argmax( t, axis=1)).astype("float").mean() print(("Epoch {:3d} Step {:2d} Train CE {:.5f} " "Train Acc {:.5f}").format(epoch, step, train_ce, train_acc)) # Compute error. error = (prediction - t) / float(x.shape[0]) # Backward prop. backward(model, error, var) # Update weights. update(model, alpha) valid_ce, valid_acc = evaluate(inputs_valid, target_valid, model, forward, batch_size=batch_size) print(("Epoch {:3d} " "Validation CE {:.5f} " "Validation Acc {:.5f}\n").format(epoch, valid_ce, valid_acc)) train_ce_list.append((epoch, train_ce)) train_acc_list.append((epoch, train_acc)) valid_ce_list.append((epoch, valid_ce)) valid_acc_list.append((epoch, valid_acc)) display_plot(train_ce_list, valid_ce_list, "Cross Entropy", number=0) display_plot(train_acc_list, valid_acc_list, "Accuracy", number=1) train_ce, train_acc = evaluate(inputs_train, target_train, model, forward, batch_size=batch_size) valid_ce, valid_acc = evaluate(inputs_valid, target_valid, model, forward, batch_size=batch_size) test_ce, test_acc = evaluate(inputs_test, target_test, model, forward, batch_size=batch_size) print("CE: Train %.5f Validation %.5f Test %.5f" % (train_ce, valid_ce, test_ce)) print("Acc: Train {:.5f} Validation {:.5f} Test {:.5f}".format( train_acc, valid_acc, test_acc)) stats = { "train_ce": train_ce_list, "valid_ce": valid_ce_list, "train_acc": train_acc_list, "valid_acc": valid_acc_list } return model, stats