Esempio n. 1
0
def train(agent: DQNAgent, env: Env, episodes: int = 10_000):
    display = False

    progression = tqdm.trange(episodes,
                              desc=f"Training {agent.name}",
                              unit="episode")
    fps = 0

    for episode in progression:
        state = env.reset()

        mean_reward = 0
        return_ = 0
        x_pos = 0

        for step in count(1):
            t = time()
            action = agent.act(np.asarray(state), explore=True)
            next_state, reward, done, info = env.step(action)
            agent.memorize(
                Experience((state, next_state, action, done, reward)))
            state = next_state
            agent.learn()

            mean_reward += (reward - mean_reward) / step
            return_ += reward
            x_pos = max(x_pos, info["x_pos"])
            fps = fps * 0.9 + 0.1 / (time() - t)

            if not step % 100:
                try:
                    display = (yaml.load(
                        (PROJECT_DIRECTORY / "display.yml").read_text()).get(
                            agent.name, {}).get("display", False))
                except:
                    pass
            if display:
                env.render()

            if done or info["flag_get"]:
                break

        progression.set_description(
            f"Training {agent.name}; "
            f"Frames: {agent.step} ({fps:.0f} FPS); "
            f"last progression: {x_pos} ({x_pos/3260:.1%}); "
            f"eps: {agent.eps:.2f}")

        agent.register_episode(
            EpisodeMetrics(episode=episode,
                           x_pos=x_pos,
                           return_=return_,
                           steps=step))

    agent.save_model()
Esempio n. 2
0
        if d:
            t = 1
        else:
            t = 0

            agent.memory.store_effect(last_stored_frame_idx, a, r, t)

        inner_success.append(picked_up)

        if d:
            done = True

        # 42 steps per episode it seems
        if env_steps % 5 == 0:
            for _ in range(20):
                agent.learn()
            n_param_steps += 1

        # if n_param_steps % 100 == 0:
        #     agent.update_target_net()

        s = s_

    if np.sum(inner_success) > 0:
        successful_eps.append(1)
    else:
        successful_eps.append(0)

    if i % 10 == 0:
        agent.update_target_net()