Example #1
0
def loop(
    env: gym.Env,
    agent: agents.Agent,
    num_episodes: int,
    epsilon: float,
    should_learn: bool,
):
    wins = 0
    episode = 1
    observation = env.reset()

    env.render()
    while episode <= num_episodes:
        if random() < epsilon:
            action = env.action_space.sample()
        else:
            action = agent.act(observation)

        new_observation, reward, done, info = env.step(action)

        if should_learn:
            agent.learn(observation, action, new_observation, reward)

        env.render()
        print("\tEpisodes:", episode, "\tWins:", wins)
        print("\tWin Ratio:", wins / episode)

        if done:
            observation = env.reset()
            env.render()
            episode += 1
            if reward:
                wins += 1
        else:
            observation = new_observation

    env.close()
Example #2
0
    _agent = Agent((8,), 4)
    if os.path.exists(_f_checkpoint):
        _agent.net.load_checkpoint(_f_checkpoint)

    _writer = SummaryWriter(_d_log)
    _is_quit = False
    while _episode < _n_games:
        _observation = _env.reset()
        _done = False
        _score = 0.0
        while not _done:
            _action = _agent.get_action(_observation)
            _next_observation, _reward, _done, _info = _env.step(_action)
            _score += _reward
            _agent.learn(_observation, _reward, _next_observation, _done)
            _observation = _next_observation

            _rgb = _env.render("rgb_array")
            _bgr = cv2.cvtColor(_rgb, cv2.COLOR_RGB2BGR)
            cv2.imshow("frame", _bgr)
            _key_code = cv2.waitKey(1)
            if _key_code in [27, ord('q')]:
                _is_quit = True
                break
        if _is_quit:
            break
        _scores.append(_score)
        _episode += 1
        _avg_score = float(np.mean(_scores[-100:]))
        if _episode % 500 == 0: