Beispiel #1
0
def test_ddp_beta_0():
    n, m = 3, 2
    R = np.array([[0, 1], [1, 0], [0, 1]])
    Q = np.empty((n, m, n))
    Q[:] = 1 / n
    beta = 0
    sigma_star = [1, 0, 1]
    v_star = [1, 1, 1]
    v_init = [0, 0, 0]

    ddp0 = DiscreteDP(R, Q, beta)
    ddp1 = ddp0.to_sa_pair_form()
    ddp2 = ddp0.to_sa_pair_form(sparse=False)
    methods = ['vi', 'pi', 'mpi', 'lp']

    for ddp in [ddp0, ddp1, ddp2]:
        for method in methods:
            if method == 'lp' and ddp._sparse:
                assert_raises(NotImplementedError,
                              ddp.solve,
                              method=method,
                              v_init=v_init)
            else:
                res = ddp.solve(method=method, v_init=v_init)
                assert_array_equal(res.sigma, sigma_star)
                assert_array_equal(res.v, v_star)
Beispiel #2
0
def test_ddp_to_sa_and_to_product():
    n, m = 3, 2
    R = np.array([[0, 1], [1, 0], [-np.inf, 1]])
    Q = np.empty((n, m, n))
    Q[:] = 1/n
    Q[0, 0, 0] = 0
    Q[0, 0, 1] = 2/n
    beta = 0.95

    sparse_R = np.array([0, 1, 1, 0, 1])
    _Q = np.full((5, 3), 1/3)
    _Q[0, 0] = 0
    _Q[0, 1] = 2/n
    sparse_Q = sparse.coo_matrix(_Q)

    ddp = DiscreteDP(R, Q, beta)
    ddp_sa = ddp.to_sa_pair_form()
    ddp_sa2 = ddp_sa.to_sa_pair_form()
    ddp_sa3 = ddp.to_sa_pair_form(sparse=False)
    ddp2 = ddp_sa.to_product_form()
    ddp3 = ddp_sa2.to_product_form()
    ddp4 = ddp.to_product_form()

    # make sure conversion worked
    for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]:
        assert_allclose(ddp_s.R, sparse_R)
        # allcose doesn't work on sparse
        np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15
        assert_allclose(ddp_s.beta, beta)

    # these two will have probability 0 in state 2, action 0 b/c
    # of the infeasiability in R
    funky_Q = np.empty((n, m, n))
    funky_Q[:] = 1/n
    funky_Q[0, 0, 0] = 0
    funky_Q[0, 0, 1] = 2/n
    funky_Q[2, 0, :] = 0
    for ddp_f in [ddp2, ddp3]:
        assert_allclose(ddp_f.R, ddp.R)
        assert_allclose(ddp_f.Q, funky_Q)
        assert_allclose(ddp_f.beta, ddp.beta)

    # this one is just the original one.
    assert_allclose(ddp4.R, ddp.R)
    assert_allclose(ddp4.Q, ddp.Q)
    assert_allclose(ddp4.beta, ddp.beta)

    for method in ["pi", "vi", "mpi"]:
        sol1 = ddp.solve(method=method)
        for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]:
            sol2 = ddp_other.solve(method=method)

            for k in ["v", "sigma", "num_iter"]:
                assert_allclose(sol1[k], sol2[k])
Beispiel #3
0
def test_ddp_to_sa_and_to_product():
    n, m = 3, 2
    R = np.array([[0, 1], [1, 0], [-np.inf, 1]])
    Q = np.empty((n, m, n))
    Q[:] = 1 / n
    Q[0, 0, 0] = 0
    Q[0, 0, 1] = 2 / n
    beta = 0.95

    sparse_R = np.array([0, 1, 1, 0, 1])
    _Q = np.full((5, 3), 1 / 3)
    _Q[0, 0] = 0
    _Q[0, 1] = 2 / n
    sparse_Q = sparse.coo_matrix(_Q)

    ddp = DiscreteDP(R, Q, beta)
    ddp_sa = ddp.to_sa_pair_form()
    ddp_sa2 = ddp_sa.to_sa_pair_form()
    ddp_sa3 = ddp.to_sa_pair_form(sparse=False)
    ddp2 = ddp_sa.to_product_form()
    ddp3 = ddp_sa2.to_product_form()
    ddp4 = ddp.to_product_form()

    # make sure conversion worked
    for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]:
        assert_allclose(ddp_s.R, sparse_R)
        # allcose doesn't work on sparse
        np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15
        assert_allclose(ddp_s.beta, beta)

    # these two will have probability 0 in state 2, action 0 b/c
    # of the infeasiability in R
    funky_Q = np.empty((n, m, n))
    funky_Q[:] = 1 / n
    funky_Q[0, 0, 0] = 0
    funky_Q[0, 0, 1] = 2 / n
    funky_Q[2, 0, :] = 0
    for ddp_f in [ddp2, ddp3]:
        assert_allclose(ddp_f.R, ddp.R)
        assert_allclose(ddp_f.Q, funky_Q)
        assert_allclose(ddp_f.beta, ddp.beta)

    # this one is just the original one.
    assert_allclose(ddp4.R, ddp.R)
    assert_allclose(ddp4.Q, ddp.Q)
    assert_allclose(ddp4.beta, ddp.beta)

    for method in ["pi", "vi", "mpi"]:
        sol1 = ddp.solve(method=method)
        for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]:
            sol2 = ddp_other.solve(method=method)

            for k in ["v", "sigma", "num_iter"]:
                assert_allclose(sol1[k], sol2[k])
Beispiel #4
0
def test_ddp_beta_1_not_implemented_error():
    n, m = 3, 2
    R = np.array([[0, 1], [1, 0], [0, 1]])
    Q = np.empty((n, m, n))
    Q[:] = 1/n
    beta = 1

    ddp0 = DiscreteDP(R, Q, beta)
    ddp1 = ddp0.to_sa_pair_form()
    ddp2 = ddp0.to_sa_pair_form(sparse=False)

    solution_methods = \
        ['value_iteration', 'policy_iteration', 'modified_policy_iteration']

    for ddp in [ddp0, ddp1, ddp2]:
        assert_raises(NotImplementedError, ddp.evaluate_policy, np.zeros(n))
        for method in solution_methods:
            assert_raises(NotImplementedError, getattr(ddp, method))
Beispiel #5
0
def test_ddp_beta_1_not_implemented_error():
    n, m = 3, 2
    R = np.array([[0, 1], [1, 0], [0, 1]])
    Q = np.empty((n, m, n))
    Q[:] = 1 / n
    beta = 1

    ddp0 = DiscreteDP(R, Q, beta)
    ddp1 = ddp0.to_sa_pair_form()
    ddp2 = ddp0.to_sa_pair_form(sparse=False)

    solution_methods = \
        ['value_iteration', 'policy_iteration', 'modified_policy_iteration']

    for ddp in [ddp0, ddp1, ddp2]:
        assert_raises(NotImplementedError, ddp.evaluate_policy, np.zeros(n))
        for method in solution_methods:
            assert_raises(NotImplementedError, getattr(ddp, method))
Beispiel #6
0
def test_ddp_beta_0():
    n, m = 3, 2
    R = np.array([[0, 1], [1, 0], [0, 1]])
    Q = np.empty((n, m, n))
    Q[:] = 1/n
    beta = 0
    sigma_star = [1, 0, 1]
    v_star = [1, 1, 1]
    v_init = [0, 0, 0]

    ddp0 = DiscreteDP(R, Q, beta)
    ddp1 = ddp0.to_sa_pair_form()
    methods = ['vi', 'pi', 'mpi']

    for ddp in [ddp0, ddp1]:
        for method in methods:
            res = ddp.solve(method=method, v_init=v_init)
            assert_array_equal(res.sigma, sigma_star)
            assert_array_equal(res.v, v_star)
Beispiel #7
0
def test_ddp_beta_0():
    n, m = 3, 2
    R = np.array([[0, 1], [1, 0], [0, 1]])
    Q = np.empty((n, m, n))
    Q[:] = 1 / n
    beta = 0
    sigma_star = [1, 0, 1]
    v_star = [1, 1, 1]
    v_init = [0, 0, 0]

    ddp0 = DiscreteDP(R, Q, beta)
    ddp1 = ddp0.to_sa_pair_form()
    methods = ['vi', 'pi', 'mpi']

    for ddp in [ddp0, ddp1]:
        for method in methods:
            res = ddp.solve(method=method, v_init=v_init)
            assert_array_equal(res.sigma, sigma_star)
            assert_array_equal(res.v, v_star)