def cartpole_get_grad_logp_action(theta, ob, action): """ :param theta: A matrix of size |A| * (|S|+1) :param ob: A vector of size |S| :param action: An integer :return: A matrix of size |A| * (|S|+1) """ e_a = np.zeros(theta.shape[0]) # |A| e_a[action] = 1. ob_1 = include_bias(ob) # |S| + 1 logits = ob_1.dot(theta.T) # |S| + 1 * (|S|+1) * |A| return np.outer(e_a - softmax(logits), ob_1) # (|A| - |A|) * |S| + 1
def cartpole_get_grad_logp_action(theta, ob, action): """ :param theta: A matrix of size |A| * (|S|+1) :param ob: A vector of size |S| :param action: An integer :return: A matrix of size |A| * (|S|+1) """ # grad = np.zeros_like(theta) "*** YOUR CODE HERE ***" a = np.zeros(theta.shape[0]) a[action] = 1 p = softmax(compute_logits(theta, ob)) ob_1 = include_bias(ob) return np.outer(a - p, ob_1)
epochs = 5000 W0 = np.random.randn(input_dims, hidden_dims) W1 = np.random.randn(hidden_dims, num_labels) W0s = W0.copy() W1s = W1.copy() B0 = np.random.randn(1, hidden_dims) B1 = np.random.randn(1, num_labels) iz = [] losses = [] for i in range(epochs): # forward propagate a1 = sigmoid(X.dot(W0) + B0) a2 = softmax(a1.dot(W1) + B1) loss = cross_entropy_loss(y_onehot, a2) # backpropagation # how much we missed times nothing # the beauty of cross entropy l2_delta = cross_entropy_grad(y_onehot, a2) # how much did each l1 value contribute to the l2 loss # (according to the weights)? l1_loss = l2_delta.dot(W1.T) # in what direction is the target a1? # were we really sure? if so, don't change too much. l1_delta = l1_loss * sigmoid_grad(a1)
np.random.seed(1) epochs = 5000 W0 = np.random.randn(4, 8) W1 = np.random.randn(8, num_labels) W0s = W0.copy() W1s = W1.copy() iz = [] losses = [] for i in range(epochs): # forward propagate a1 = sigmoid(X.dot(W0)) a2 = softmax(a1.dot(W1)) loss = cross_entropy_loss(y_onehot, a2) # backpropagation # how much we missed times nothing # the beauty of cross entropy l2_delta = cross_entropy_grad(y_onehot, a2) # how much did each l1 value contribute to the l2 loss # (according to the weights)? l1_loss = l2_delta.dot(W1.T) # in what direction is the target a1? # were we really sure? if so, don't change too much. l1_delta = l1_loss * sigmoid_grad(a1)