def test_transition2(): initial_state = SoccerEnv.encode_state(0,3,0,1,False) action = SoccerEnv.encode_action(SoccerEnv.Action.S, SoccerEnv.Action.N) env = SoccerEnv() transitions = env.P[initial_state][action] expected_next_state = SoccerEnv.encode_state(1,3,0,1,False) for prob, next_state, reward, done in transitions: assert next_state == expected_next_state assert reward == 0 assert done == 0
def test_render(): env = SoccerEnv() env.render() action = env.encode_action(SoccerEnv.Action.Stick, SoccerEnv.Action.Stick) env.step(action) env.render() return
def test_reward(): assert SoccerEnv.reward((0, 2, 0, 1, 1)) == 0 assert SoccerEnv.reward((0, 3, 0, 1, 1)) == -100 assert SoccerEnv.reward((0, 0, 0, 1, 1)) == 100 assert SoccerEnv.reward((0, 0, 0, 1, 0)) == 0 assert SoccerEnv.reward((0, 0, 0, 1, 0)) == 0 assert SoccerEnv.reward((0, 0, 0, 0, 0)) == 100 assert SoccerEnv.reward((0, 0, 0, 3, 0)) == -100
def test_done(): assert SoccerEnv.done((0, 2, 0, 1, 1)) is False assert SoccerEnv.done((0, 3, 0, 1, 1)) assert SoccerEnv.done((0, 0, 0, 1, 1)) assert SoccerEnv.done((0, 0, 0, 1, 0)) is False assert SoccerEnv.done((0, 0, 0, 1, 0)) is False assert SoccerEnv.done((0, 0, 0, 0, 0)) assert SoccerEnv.done((0, 0, 0, 3, 0))
def test_transitions(): transitions = SoccerEnv.transitions(0, 2, 0, 1, 0, SoccerEnv.Action.W, SoccerEnv.Action.Stick) expected_states = set([SoccerEnv.encode_state(0, 2, 0, 1, 0), SoccerEnv.encode_state(0, 2, 0, 1, 1)]) assert len(transitions) == 2 for next_state, reward, done in transitions: assert next_state in expected_states assert reward == 0 assert done == 0 state = SoccerEnv.encode_state(0, 2, 0, 1, 0) action = SoccerEnv.encode_action(SoccerEnv.Action.W, SoccerEnv.Action.Stick) env = SoccerEnv() transitions = env.P[state][action] assert len(transitions) == 2 for prob, next_state, reward, done in transitions: assert abs(prob - 0.5) < 0.001 assert next_state in expected_states assert reward == 0 assert done == 0
def test_action_encode(): env = SoccerEnv() action1, action2 = 1, 2 x = env.encode_action(1,2) assert (action1, action2) == env.decode_action(x)
#%% md ## Step 0: Soccer Env Setup #%% import sys sys.path.insert( 0, '/Users/cesleemontgomery/masters/cs7642/projects/7642Fall2019cmontgomery38/project3' ) from soccer import SoccerEnv #%% env = SoccerEnv() # n_episodes_MAX = 8*10**5 #10*10**5 # steps_MAX = 100 # verbose = False # alpha = 0.01 # # num_states = env.observation_space.n # num_actions = env.action_space.n # num_individual_actions = int( # math.sqrt(env.action_space.n)) # individual player action space is 5, joint action space is 25 # num_states = env.observation_space.n num_actions = env.action_space.n num_individual_actions = int( math.sqrt(env.action_space.n)