def compute_acc(X, Y, W): y_hat = sr.compute_a(sr.compute_z(X, W, 0)) y_hat_indicies = np.argmax(y_hat, axis=1) y_indicies = np.argmax(Y, axis=1) error = np.array([y_indicies == y_hat_indicies]) acc = error.sum() / X.shape[0] return acc
def compute_z2(a1, W2, b2): ''' Compute the linear logit values of a data instance in the first layer. z1 = W1 x + b1 Input: a1: the non-linear activations in the first layer, a float numpy vector of shape h by 1. W2: the weight matrix of the 2nd layer, a float numpy matrix of shape (h by c). Here c is the number of classes. b2: the bias values of the 2nd layer, a float numpy vector of shape c by 1. Output: z2: the linear logits of the 2nd layer, a float numpy vector of shape c by 1. ''' z2 = sr.compute_z(a1, W2, b2) return z2
def compute_z1(x, W1, b1): ''' Compute the linear logit values of a data instance in the first layer. z1 = W1 x + b1 Input: x: the feature vector of a data instance, a float numpy vector of shape p by 1. Here p is the number of features/dimensions. W1: the weight matrix of the first layer, a float numpy matrix of shape (h by p). Here h is the number of outputs in the first layer. b1: the bias values of the first layer, a float numpy vector of shape h by 1. Output: z1: the linear logits, a float numpy vector of shape h by 1. Hint: you could reuse the fucntions in problem 1, for example sr.function_name() ''' z1 = sr.compute_z(x, W1, b1) return z1