def test_weighted_rewards(): with dsl.new() as new_mdp: state = dsl.state() action = dsl.action() state & action > dsl.reward(1) * 1 state & action > dsl.reward(1) * 1 | dsl.reward(2) * 3
def test_mapping_alternative_mismatch_fail(): with pytest.raises(dsl.SyntaxError): with dsl.new(): stateA = dsl.state() actionB = dsl.action() stateA > stateA | actionB
def test_missing_terminal_state_fail(): with pytest.raises(ValueError): with dsl.new() as new_mdp: dsl.state() dsl.action() new_mdp.validate()
def test_multi_states_fail(): with pytest.raises(dsl.SyntaxError): with dsl.new(): stateA = dsl.state() stateB = dsl.state() stateA & stateB
def test_multi_actions_fail(): with pytest.raises(dsl.SyntaxError): with dsl.new(): actionA = dsl.action() actionB = dsl.action() actionA & actionB
def test_coverage(): with dsl.new() as new_mdp: # type: mdp.MDPSpec start = dsl.state() finish = dsl.terminal_state() action = dsl.action() start & action > finish | dsl.reward(1) assert new_mdp.num_states == 2 assert new_mdp.num_actions == 1 new_mdp.to_graph() env: mdp.MDPEnv = new_mdp.to_env() state = env.reset() assert state == 0 state, reward, is_done, info = env.step(0) assert state == 1 assert reward == 1 assert is_done env.render(mode='rgb_array') env.render(mode='png')
def test_terminal_state(): with dsl.new() as new_mdp: dsl.terminal_state() dsl.action() solver = lp.LinearProgramming(new_mdp) assert np.isclose(solver.compute_q_table(), [0])
def test_weighted_next_states(): with dsl.new() as new_mdp: state = dsl.state() action = dsl.action() state & action > state * 0.5 state & action > state * 2 | state * 5 new_mdp.validate()
def test_alternatives3(): with dsl.new(): stateA = dsl.state() stateB = dsl.state() actionA = dsl.action() actionB = dsl.action() (stateA | stateB) & ((actionA > stateA) | (actionB > stateB)) dsl.to_env()
def test_alternatives(): with dsl.new(): stateA = dsl.state() stateB = dsl.state() actionA = dsl.action() actionB = dsl.action() (stateA | stateB) & (actionA | actionB) > (stateA | stateB) dsl.to_env()
def _multi_round_nmdp(): with dsl.new() as mdp: start = dsl.state() end = dsl.terminal_state() start & dsl.action() > dsl.reward(5) | start | end * 2 start & dsl.action() > dsl.reward(3) | start * 2 | end dsl.discount(0.5) return mdp.validate()
def test_divergence_raises(): with dsl.new() as new_mdp: start = dsl.state() action = dsl.action() start & action > start | dsl.reward(1) solver = lp.LinearProgramming(new_mdp) with pytest.raises(ValueError): solver.compute_v_vector(max_iterations=10)
def _one_round_dmdp(): with dsl.new() as mdp: start = dsl.state() end = dsl.terminal_state() action_0 = dsl.action() action_1 = dsl.action() start & (action_0 | action_1) > end start & action_1 > dsl.reward(1.) return mdp.validate()
def test_coverage_nmrp(): with dsl.new(): stateA = dsl.state() stateB = dsl.state() actionA = dsl.action() actionB = dsl.action() stateA & actionA > stateA stateA & actionB > stateB stateB & (actionA | actionB) > stateB dsl.to_env()
def test_geometric_series(): with dsl.new() as new_mdp: start = dsl.state() action = dsl.action() start & action > dsl.reward(1) | start dsl.discount(0.5) solver = lp.LinearProgramming(new_mdp) assert np.allclose(solver.compute_v_vector(), [2.0]) assert np.allclose(solver.compute_q_table(), [[2.0]])
def test_coverage(): with dsl.new() as new_mdp: stateA = dsl.state() stateB = dsl.state() actionA = dsl.action() actionB = dsl.action() stateA & actionA > stateA stateA & actionB > stateB stateB & (actionA | actionB) > stateB new_mdp.to_env() new_mdp.to_graph() return new_mdp.validate()
def _two_round_dmdp(): with dsl.new() as mdp: start = dsl.state() better = dsl.state() worse = dsl.state() end = dsl.terminal_state() action_0 = dsl.action() action_1 = dsl.action() start & action_0 > better better & action_1 > dsl.reward(3) start & action_1 > worse worse & action_0 > dsl.reward(1) worse & action_1 > dsl.reward(2) (better | worse) & (action_0 | action_1) > end return mdp.validate()
def _two_round_nmdp(): with dsl.new() as mdp: start = dsl.state() a = dsl.state() b = dsl.state() end = dsl.terminal_state() action_0 = dsl.action() action_1 = dsl.action() start & action_0 > a a & action_0 > dsl.reward(-1) | dsl.reward(1) a & action_1 > dsl.reward(0) * 2 | dsl.reward(9) start & action_1 > b b & action_0 > dsl.reward(0) | dsl.reward(2) b & action_1 > dsl.reward(2) | dsl.reward(3) (a | b) & (action_0 | action_1) > end return mdp.validate()
def test_multiple_actions(): with dsl.new() as new_mdp: start = dsl.state() state_a = dsl.state() state_b = dsl.state() action_a = dsl.action() action_b = dsl.action() either_action = action_a | action_b start & action_a > state_a start & action_b > state_b state_a & either_action > state_a | dsl.reward(1) state_b & either_action > state_b | dsl.reward(2) dsl.discount(1 / 3) solver = lp.LinearProgramming(new_mdp) assert np.allclose(solver.compute_v_vector(), [1, 1.5, 3]) assert np.allclose(solver.compute_q_table(), [[0.5, 1], [1.5, 1.5], [3, 3]])