def cartpole_get_action(theta, ob, rng=np.random): """ :param theta: A matrix of size |A| * (|S|+1) :param ob: A vector of size |S| :return: An integer """ return weighted_sample(compute_logits(theta, ob), rng=rng)
def cartpole_get_action(theta, ob, rng=np.random): ob_1 = include_bias(ob) logits = ob_1.dot(theta.T) return weighted_sample(logits, rng=rng)