Exemple #1
0
def autoplay(method, environment, resume, render):
    game = Game(name=environments_to_names[environment], render=render)

    init_state, state_shape = game.get_state(True)
    n_actions = game.env.action_space.n
    agent_cls = agent_factory[method]
    agent = agent_cls(state_shape, n_actions, environment, 1, 1, eps_start=0.0)
    agent.load(resume)

    log.info(f'Evaluating agent, loaded from {resume}, starting ...')

    game.reset()
    state = game.get_state()
    for t in count():
        state = game.get_state()
        action = agent.select_action(state)
        transition, done = game.step(int(action.cpu().numpy()))
        # agent.eval(
        #     transition, 1, 0.0)
        time.sleep(0.1)
        if done:
            log.info(f'agent survived {t} steps')
            game.reset()
            break

    game.env.close()
Exemple #2
0
def play(environment):
    game = Game(name=environments_to_names[environment], render=True)
    done = False
    try:
        while not done:
            action = click.prompt('Please enter an action (0, 1, 2, 3..)')
            done = game.step(int(action))
        log.info('[INFO] done ...')
    except KeyboardInterrupt:
        log.info("[INFO] quiting ...")
        exit()
Exemple #3
0
def train(method, environment, resume, episodes, lr, lr_episodes, min_lr,
          eval_only, replay_width, batch_size, gamma, update_rate,
          save_interval):

    history = History(method + '_' + environment,
                      ['steps', 'avg_reward', 'loss'], resume is not None)
    history.flush()
    memory = ReplayMemory(replay_width)
    game = Game(name=environments_to_names[environment],
                memory=memory,
                render=False)
    init_state, state_shape = game.get_state(True)
    n_actions = game.env.action_space.n
    agent_cls = agent_factory[method]
    agent = agent_cls(state_shape,
                      n_actions,
                      environment,
                      episodes,
                      update_rate,
                      step_size=lr_episodes,
                      lr=lr,
                      save_interval=save_interval)

    # resume from a ckpt
    if resume is not None:
        agent.load(resume)

    avg_reward = MovingAverage(100)
    avg_loss = MovingAverage(100)

    log.info(f'Training with {episodes}, starting ...')

    # main training loop
    for i in range(episodes):
        state = game.reset()
        done = False
        loss = None
        while not done:
            state = game.state
            action = agent.select_action(state)

            transition, done = game.step(int(action.to('cpu').numpy()))

            if len(memory) > batch_size:
                batched = memory.sample(batch_size)
                loss = agent.train(batched, batch_size, gamma, i)
                avg_loss.add(loss)
        reward = game.rewards
        # agent.save_best(reward)
        agent.save()
        agent.scheduler.step()
        avg_reward.add(reward)

        # moving averages
        text = [
            f'steps: {agent.step_cnt}',
            f'game epochs: {i}/{episodes}',
            f'train loss: {float(avg_loss):.5}',
            f'avg reward: {float(avg_reward):.5}',
            # f'best reward: {float(agent.best_reward):.5}',
            f'reward: {float(reward):.5}',
            f'epsilon: {agent.epsilon:.3}',
        ]
        log.info(', '.join(text), update=True)
        if agent.step_cnt % save_interval == 0:
            history.record({
                'steps': agent.step_cnt,
                'avg_reward': float(avg_reward),
                'loss': float(avg_loss),
            })

    game.env.close()
Exemple #4
0
        state, reward, done = env.render()
    # apparently the Conv2D wants a 4D shape like (1,64,64,1)
    state = shapeState(state)
    for time in range(500):
        action = agent.act(state)

        #!# in the subsequent line, it is unclear what is the object that normally would be
        #                                                             assigned to next_state, but
        #   in the original code the variable is overwritten by the `next_state = shapeState(pix)`
        #   i.e. with image representation of the state;
        #   the goal seems to be to obtain the reward & done values from the `env.action()` call;
        #   while the subsequent call `pix  = env.render()` is to actually get the image capture...

        # next_state, reward, done, _ = env.step(action)
        # pix  = env.render()
        env.step(action)
        pix, reward, done = env.render()

        frames.append(pix)
        next_state = shapeState(pix)
        reward = reward if not done else -500
        print(time, ')', reward)
        agent.write_state(state, action, reward, next_state, done)
        state = next_state
        if done:
            print("episode: {}/{}, score: {}, e: {:.2}".format(
                e, EPISODES, time, agent.epsilon))
            if time > max_score:
                max_score = time
                best = frames
            break