def check_dL_dW1(x, y, W1, b1, W2, b2, delta=1e-7): ''' Compute gradient of the weights W1 a using gradient check. Input: delta: a small number for gradient check, a float scalar. Output: dL_dW1: the approximated gradient of the loss L w.r.t. the weights W1 ''' h, p = W1.shape dL_dW1 = np.asmatrix(np.zeros((h, p))) for i in range(h): for j in range(p): d = np.asmatrix(np.zeros((h, p))) d[i, j] = delta z1, a1, z2, a2 = forward(x, W1 + d, b1, W2, b2) L = sr.compute_L(a2, y) z1, a1, z2, a2 = forward(x, W1, b1, W2, b2) dL_dW1[i, j] = (L - sr.compute_L(a2, y)) / delta return dL_dW1
def compute_acc(X, Y, W): y_hat = sr.compute_a(sr.compute_z(X, W, 0)) y_hat_indicies = np.argmax(y_hat, axis=1) y_indicies = np.argmax(Y, axis=1) error = np.array([y_indicies == y_hat_indicies]) acc = error.sum() / X.shape[0] return acc X = np.load('mnist_train_images.npy') Y = np.load('mnist_train_labels.npy') # W = train(X, Y, batch_size=75, epsilon=0.07, n_epochs=2, alpha=.005) W = train(X, Y) """ Performance Evaluation with the Validation Set """ X_valid = np.load('mnist_validation_images.npy') Y_valid = np.load('mnist_validation_labels.npy') acc = compute_acc(X_valid, Y_valid, W) * 100 # unreg_loss = compute_L_unreg(X_valid, Y_valid, W) unreg_loss = sr.compute_L(X_valid, Y_valid, W) print("Validation Loss: %.2f" % unreg_loss) print("Validation Accuracy: %.2f %%." % acc) plt.plot(L) plt.title("Loss over each iteration (mini-batch)") plt.show()