def test_terminal_state(): with dsl.new() as new_mdp: dsl.terminal_state() dsl.action() solver = lp.LinearProgramming(new_mdp) assert np.isclose(solver.compute_q_table(), [0])
def test_coverage(): with dsl.new() as new_mdp: # type: mdp.MDPSpec start = dsl.state() finish = dsl.terminal_state() action = dsl.action() start & action > finish | dsl.reward(1) assert new_mdp.num_states == 2 assert new_mdp.num_actions == 1 new_mdp.to_graph() env: mdp.MDPEnv = new_mdp.to_env() state = env.reset() assert state == 0 state, reward, is_done, info = env.step(0) assert state == 1 assert reward == 1 assert is_done env.render(mode='rgb_array') env.render(mode='png')
def _multi_round_nmdp(): with dsl.new() as mdp: start = dsl.state() end = dsl.terminal_state() start & dsl.action() > dsl.reward(5) | start | end * 2 start & dsl.action() > dsl.reward(3) | start * 2 | end dsl.discount(0.5) return mdp.validate()
def _one_round_dmdp(): with dsl.new() as mdp: start = dsl.state() end = dsl.terminal_state() action_0 = dsl.action() action_1 = dsl.action() start & (action_0 | action_1) > end start & action_1 > dsl.reward(1.) return mdp.validate()
def _two_round_dmdp(): with dsl.new() as mdp: start = dsl.state() better = dsl.state() worse = dsl.state() end = dsl.terminal_state() action_0 = dsl.action() action_1 = dsl.action() start & action_0 > better better & action_1 > dsl.reward(3) start & action_1 > worse worse & action_0 > dsl.reward(1) worse & action_1 > dsl.reward(2) (better | worse) & (action_0 | action_1) > end return mdp.validate()
def _two_round_nmdp(): with dsl.new() as mdp: start = dsl.state() a = dsl.state() b = dsl.state() end = dsl.terminal_state() action_0 = dsl.action() action_1 = dsl.action() start & action_0 > a a & action_0 > dsl.reward(-1) | dsl.reward(1) a & action_1 > dsl.reward(0) * 2 | dsl.reward(9) start & action_1 > b b & action_0 > dsl.reward(0) | dsl.reward(2) b & action_1 > dsl.reward(2) | dsl.reward(3) (a | b) & (action_0 | action_1) > end return mdp.validate()