Beispiel #1
0
def test_sarsa_agent():
    env = RoRoDeck()
    env.rows = 12
    env.lanes = 12
    env.reset()
    # set all units to equal length
    env.vehicle_data[4][1] = 2
    env.vehicle_data[4][3] = 2

    agent = SARSA(env, None)

    assert len(agent.q_table.keys()) == 0
    agent.number_of_episodes = 1
    agent.train()

    assert len(agent.q_table.keys()) == 61
Beispiel #2
0
def test_max_action_method_sarsa():
    env = RoRoDeck()
    env.rows = 12
    env.lanes = 12
    state = env.reset()

    agent = SARSA(env, None)

    agent.q_table[state.tobytes()] = np.zeros(4)

    assert np.count_nonzero(agent.q_table[state.tobytes()]) == 0
    agent.q_table[state.tobytes()][2] = 1
    agent.q_table[state.tobytes()][3] = 2

    assert agent.max_action(state, env.possible_actions) == 3

    env.possible_actions = np.array([0, 1, 2])

    assert agent.max_action(state, env.possible_actions) == 2