Esempio n. 1
0
def _run_agent_one_ep(env: BaseEnv,
                      agent: DQNAgent,
                      config: Config,
                      eps: float,
                      behavior_name: str,
                      train: Optional[bool] = True):
    # Get a starting state
    env.reset()

    decision_steps, terminal_steps = env.get_steps(behavior_name)
    state = decision_steps.obs[0]

    agent_id = decision_steps.agent_id[0]
    done = False
    did_win = False
    episode_reward = 0.0
    import time
    while not done:
        reward = 0.0
        # Get and perform an action
        action = agent.act(decision_steps.obs[0], eps)
        env.set_actions(behavior_name,
                        np.expand_dims(action, 0).reshape(-1, 1))
        env.step()

        decision_steps, terminal_steps = env.get_steps(behavior_name)
        # Determine S', R, Done
        next_state = None
        if agent_id in decision_steps:
            reward += decision_steps.reward[0]
            next_state = decision_steps.obs[0]
        if agent_id in terminal_steps:
            terminal_reward = terminal_steps.reward[0]
            # Add win/loss
            did_win = True if math.isclose(terminal_reward, 1.0) else False
            reward += terminal_reward
            next_state = terminal_steps.obs[0]
            done = True

        assert next_state is not None, f"next_state cannot be None. Agent {agent_id} did not appear in decision or terminal steps"

        if train:
            # Learn from (S, A, R, S')
            experience = Experience(state, action, reward, next_state, done)
            agent.step(experience)

        # Set new state
        state = next_state

        episode_reward += reward

    return (episode_reward, did_win)