def test_ddp_beta_0(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1 / n beta = 0 sigma_star = [1, 0, 1] v_star = [1, 1, 1] v_init = [0, 0, 0] ddp0 = DiscreteDP(R, Q, beta) ddp1 = ddp0.to_sa_pair_form() ddp2 = ddp0.to_sa_pair_form(sparse=False) methods = ['vi', 'pi', 'mpi', 'lp'] for ddp in [ddp0, ddp1, ddp2]: for method in methods: if method == 'lp' and ddp._sparse: assert_raises(NotImplementedError, ddp.solve, method=method, v_init=v_init) else: res = ddp.solve(method=method, v_init=v_init) assert_array_equal(res.sigma, sigma_star) assert_array_equal(res.v, v_star)
def test_ddp_to_sa_and_to_product(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [-np.inf, 1]]) Q = np.empty((n, m, n)) Q[:] = 1/n Q[0, 0, 0] = 0 Q[0, 0, 1] = 2/n beta = 0.95 sparse_R = np.array([0, 1, 1, 0, 1]) _Q = np.full((5, 3), 1/3) _Q[0, 0] = 0 _Q[0, 1] = 2/n sparse_Q = sparse.coo_matrix(_Q) ddp = DiscreteDP(R, Q, beta) ddp_sa = ddp.to_sa_pair_form() ddp_sa2 = ddp_sa.to_sa_pair_form() ddp_sa3 = ddp.to_sa_pair_form(sparse=False) ddp2 = ddp_sa.to_product_form() ddp3 = ddp_sa2.to_product_form() ddp4 = ddp.to_product_form() # make sure conversion worked for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]: assert_allclose(ddp_s.R, sparse_R) # allcose doesn't work on sparse np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15 assert_allclose(ddp_s.beta, beta) # these two will have probability 0 in state 2, action 0 b/c # of the infeasiability in R funky_Q = np.empty((n, m, n)) funky_Q[:] = 1/n funky_Q[0, 0, 0] = 0 funky_Q[0, 0, 1] = 2/n funky_Q[2, 0, :] = 0 for ddp_f in [ddp2, ddp3]: assert_allclose(ddp_f.R, ddp.R) assert_allclose(ddp_f.Q, funky_Q) assert_allclose(ddp_f.beta, ddp.beta) # this one is just the original one. assert_allclose(ddp4.R, ddp.R) assert_allclose(ddp4.Q, ddp.Q) assert_allclose(ddp4.beta, ddp.beta) for method in ["pi", "vi", "mpi"]: sol1 = ddp.solve(method=method) for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]: sol2 = ddp_other.solve(method=method) for k in ["v", "sigma", "num_iter"]: assert_allclose(sol1[k], sol2[k])
def test_ddp_to_sa_and_to_product(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [-np.inf, 1]]) Q = np.empty((n, m, n)) Q[:] = 1 / n Q[0, 0, 0] = 0 Q[0, 0, 1] = 2 / n beta = 0.95 sparse_R = np.array([0, 1, 1, 0, 1]) _Q = np.full((5, 3), 1 / 3) _Q[0, 0] = 0 _Q[0, 1] = 2 / n sparse_Q = sparse.coo_matrix(_Q) ddp = DiscreteDP(R, Q, beta) ddp_sa = ddp.to_sa_pair_form() ddp_sa2 = ddp_sa.to_sa_pair_form() ddp_sa3 = ddp.to_sa_pair_form(sparse=False) ddp2 = ddp_sa.to_product_form() ddp3 = ddp_sa2.to_product_form() ddp4 = ddp.to_product_form() # make sure conversion worked for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]: assert_allclose(ddp_s.R, sparse_R) # allcose doesn't work on sparse np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15 assert_allclose(ddp_s.beta, beta) # these two will have probability 0 in state 2, action 0 b/c # of the infeasiability in R funky_Q = np.empty((n, m, n)) funky_Q[:] = 1 / n funky_Q[0, 0, 0] = 0 funky_Q[0, 0, 1] = 2 / n funky_Q[2, 0, :] = 0 for ddp_f in [ddp2, ddp3]: assert_allclose(ddp_f.R, ddp.R) assert_allclose(ddp_f.Q, funky_Q) assert_allclose(ddp_f.beta, ddp.beta) # this one is just the original one. assert_allclose(ddp4.R, ddp.R) assert_allclose(ddp4.Q, ddp.Q) assert_allclose(ddp4.beta, ddp.beta) for method in ["pi", "vi", "mpi"]: sol1 = ddp.solve(method=method) for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]: sol2 = ddp_other.solve(method=method) for k in ["v", "sigma", "num_iter"]: assert_allclose(sol1[k], sol2[k])
def test_ddp_beta_1_not_implemented_error(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1/n beta = 1 ddp0 = DiscreteDP(R, Q, beta) ddp1 = ddp0.to_sa_pair_form() ddp2 = ddp0.to_sa_pair_form(sparse=False) solution_methods = \ ['value_iteration', 'policy_iteration', 'modified_policy_iteration'] for ddp in [ddp0, ddp1, ddp2]: assert_raises(NotImplementedError, ddp.evaluate_policy, np.zeros(n)) for method in solution_methods: assert_raises(NotImplementedError, getattr(ddp, method))
def test_ddp_beta_1_not_implemented_error(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1 / n beta = 1 ddp0 = DiscreteDP(R, Q, beta) ddp1 = ddp0.to_sa_pair_form() ddp2 = ddp0.to_sa_pair_form(sparse=False) solution_methods = \ ['value_iteration', 'policy_iteration', 'modified_policy_iteration'] for ddp in [ddp0, ddp1, ddp2]: assert_raises(NotImplementedError, ddp.evaluate_policy, np.zeros(n)) for method in solution_methods: assert_raises(NotImplementedError, getattr(ddp, method))
def test_ddp_beta_0(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1/n beta = 0 sigma_star = [1, 0, 1] v_star = [1, 1, 1] v_init = [0, 0, 0] ddp0 = DiscreteDP(R, Q, beta) ddp1 = ddp0.to_sa_pair_form() methods = ['vi', 'pi', 'mpi'] for ddp in [ddp0, ddp1]: for method in methods: res = ddp.solve(method=method, v_init=v_init) assert_array_equal(res.sigma, sigma_star) assert_array_equal(res.v, v_star)
def test_ddp_beta_0(): n, m = 3, 2 R = np.array([[0, 1], [1, 0], [0, 1]]) Q = np.empty((n, m, n)) Q[:] = 1 / n beta = 0 sigma_star = [1, 0, 1] v_star = [1, 1, 1] v_init = [0, 0, 0] ddp0 = DiscreteDP(R, Q, beta) ddp1 = ddp0.to_sa_pair_form() methods = ['vi', 'pi', 'mpi'] for ddp in [ddp0, ddp1]: for method in methods: res = ddp.solve(method=method, v_init=v_init) assert_array_equal(res.sigma, sigma_star) assert_array_equal(res.v, v_star)