Beispiel #1
0
def test_ddp_to_sa_and_to_product():
    n, m = 3, 2
    R = np.array([[0, 1], [1, 0], [-np.inf, 1]])
    Q = np.empty((n, m, n))
    Q[:] = 1/n
    Q[0, 0, 0] = 0
    Q[0, 0, 1] = 2/n
    beta = 0.95

    sparse_R = np.array([0, 1, 1, 0, 1])
    _Q = np.full((5, 3), 1/3)
    _Q[0, 0] = 0
    _Q[0, 1] = 2/n
    sparse_Q = sparse.coo_matrix(_Q)

    ddp = DiscreteDP(R, Q, beta)
    ddp_sa = ddp.to_sa_pair_form()
    ddp_sa2 = ddp_sa.to_sa_pair_form()
    ddp_sa3 = ddp.to_sa_pair_form(sparse=False)
    ddp2 = ddp_sa.to_product_form()
    ddp3 = ddp_sa2.to_product_form()
    ddp4 = ddp.to_product_form()

    # make sure conversion worked
    for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]:
        assert_allclose(ddp_s.R, sparse_R)
        # allcose doesn't work on sparse
        np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15
        assert_allclose(ddp_s.beta, beta)

    # these two will have probability 0 in state 2, action 0 b/c
    # of the infeasiability in R
    funky_Q = np.empty((n, m, n))
    funky_Q[:] = 1/n
    funky_Q[0, 0, 0] = 0
    funky_Q[0, 0, 1] = 2/n
    funky_Q[2, 0, :] = 0
    for ddp_f in [ddp2, ddp3]:
        assert_allclose(ddp_f.R, ddp.R)
        assert_allclose(ddp_f.Q, funky_Q)
        assert_allclose(ddp_f.beta, ddp.beta)

    # this one is just the original one.
    assert_allclose(ddp4.R, ddp.R)
    assert_allclose(ddp4.Q, ddp.Q)
    assert_allclose(ddp4.beta, ddp.beta)

    for method in ["pi", "vi", "mpi"]:
        sol1 = ddp.solve(method=method)
        for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]:
            sol2 = ddp_other.solve(method=method)

            for k in ["v", "sigma", "num_iter"]:
                assert_allclose(sol1[k], sol2[k])
Beispiel #2
0
def test_ddp_to_sa_and_to_product():
    n, m = 3, 2
    R = np.array([[0, 1], [1, 0], [-np.inf, 1]])
    Q = np.empty((n, m, n))
    Q[:] = 1 / n
    Q[0, 0, 0] = 0
    Q[0, 0, 1] = 2 / n
    beta = 0.95

    sparse_R = np.array([0, 1, 1, 0, 1])
    _Q = np.full((5, 3), 1 / 3)
    _Q[0, 0] = 0
    _Q[0, 1] = 2 / n
    sparse_Q = sparse.coo_matrix(_Q)

    ddp = DiscreteDP(R, Q, beta)
    ddp_sa = ddp.to_sa_pair_form()
    ddp_sa2 = ddp_sa.to_sa_pair_form()
    ddp_sa3 = ddp.to_sa_pair_form(sparse=False)
    ddp2 = ddp_sa.to_product_form()
    ddp3 = ddp_sa2.to_product_form()
    ddp4 = ddp.to_product_form()

    # make sure conversion worked
    for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]:
        assert_allclose(ddp_s.R, sparse_R)
        # allcose doesn't work on sparse
        np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15
        assert_allclose(ddp_s.beta, beta)

    # these two will have probability 0 in state 2, action 0 b/c
    # of the infeasiability in R
    funky_Q = np.empty((n, m, n))
    funky_Q[:] = 1 / n
    funky_Q[0, 0, 0] = 0
    funky_Q[0, 0, 1] = 2 / n
    funky_Q[2, 0, :] = 0
    for ddp_f in [ddp2, ddp3]:
        assert_allclose(ddp_f.R, ddp.R)
        assert_allclose(ddp_f.Q, funky_Q)
        assert_allclose(ddp_f.beta, ddp.beta)

    # this one is just the original one.
    assert_allclose(ddp4.R, ddp.R)
    assert_allclose(ddp4.Q, ddp.Q)
    assert_allclose(ddp4.beta, ddp.beta)

    for method in ["pi", "vi", "mpi"]:
        sol1 = ddp.solve(method=method)
        for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]:
            sol2 = ddp_other.solve(method=method)

            for k in ["v", "sigma", "num_iter"]:
                assert_allclose(sol1[k], sol2[k])