def test_V_pi(): mdp = build_SB_example35() print(mdp.reward) # random policy: policy = np.ones((mdp.P.shape[0], mdp.P.shape[1]))/mdp.P.shape[1] V_pi = calculate_V_pi(mdp.P, mdp.reward, policy, mdp.discount) assert np.allclose(np.round(V_pi, 1), np.array([3.3, 8.8, 4.4, 5.3, 1.5, 1.5, 3.0, 2.3, 1.9, 0.5, 0.1, 0.7, 0.7, 0.4, -0.4, -1.0, -0.4, -0.4, -0.6, -1.2, -1.9, -1.3, -1.2, -1.4, -2.0]))
def test_cakeworld_mdp(): """Numerical test for cake world mdp. This numerical test ensures that calculations from implemented MDP represent those that are obtained from calculations in the paper. """ epsilon = 0.1 discount = 0.99 built_mdp = build_cake_world_mdp(epsilon=0.1, discount=0.99) eval_policy = np.array([[0.5, 0.5], [0.5, 0.5]]) calc_v_pi = analytic.calculate_V_pi( built_mdp.P, built_mdp.reward, eval_policy, built_mdp.discount) # Value of "Bad State" is independent of policy. expected_value = -2.0 * (1 + epsilon) / discount calculated_value = calc_v_pi[1] assert np.isclose(calculated_value, expected_value)