Exemple #1
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    e_a = np.zeros(theta.shape[0]) # |A|
    e_a[action] = 1.
    ob_1 = include_bias(ob) # |S| + 1
    logits = ob_1.dot(theta.T) # |S| + 1  * (|S|+1) * |A|
    return np.outer(e_a - softmax(logits), ob_1)  # (|A| - |A|) * |S| + 1
Exemple #2
0
def cartpole_get_grad_logp_action(theta, ob, action):
    """
    :param theta: A matrix of size |A| * (|S|+1)
    :param ob: A vector of size |S|
    :param action: An integer
    :return: A matrix of size |A| * (|S|+1)
    """
    # grad = np.zeros_like(theta)
    "*** YOUR CODE HERE ***"

    a = np.zeros(theta.shape[0])
    a[action] = 1
    p = softmax(compute_logits(theta, ob))
    ob_1 = include_bias(ob)
    return np.outer(a - p, ob_1)
Exemple #3
0
epochs = 5000
W0 = np.random.randn(input_dims, hidden_dims)
W1 = np.random.randn(hidden_dims, num_labels)
W0s = W0.copy()
W1s = W1.copy()
B0 = np.random.randn(1, hidden_dims)
B1 = np.random.randn(1, num_labels)

iz = []
losses = []

for i in range(epochs):

    # forward propagate
    a1 = sigmoid(X.dot(W0) + B0)
    a2 = softmax(a1.dot(W1) + B1)
    loss = cross_entropy_loss(y_onehot, a2)

    # backpropagation

    # how much we missed times nothing
    # the beauty of cross entropy
    l2_delta = cross_entropy_grad(y_onehot, a2)

    # how much did each l1 value contribute to the l2 loss
    # (according to the weights)?
    l1_loss = l2_delta.dot(W1.T)

    # in what direction is the target a1?
    # were we really sure? if so, don't change too much.
    l1_delta = l1_loss * sigmoid_grad(a1)
np.random.seed(1)

epochs = 5000
W0 = np.random.randn(4, 8)
W1 = np.random.randn(8, num_labels)
W0s = W0.copy()
W1s = W1.copy()

iz = []
losses = []

for i in range(epochs):

    # forward propagate
    a1 = sigmoid(X.dot(W0))
    a2 = softmax(a1.dot(W1))
    loss = cross_entropy_loss(y_onehot, a2)

    # backpropagation

    # how much we missed times nothing
    # the beauty of cross entropy
    l2_delta = cross_entropy_grad(y_onehot, a2)

    # how much did each l1 value contribute to the l2 loss
    # (according to the weights)?
    l1_loss = l2_delta.dot(W1.T)

    # in what direction is the target a1?
    # were we really sure? if so, don't change too much.
    l1_delta = l1_loss * sigmoid_grad(a1)