Esempio n. 1
0
def autoplay(method, environment, resume, render):
    game = Game(name=environments_to_names[environment], render=render)

    init_state, state_shape = game.get_state(True)
    n_actions = game.env.action_space.n
    agent_cls = agent_factory[method]
    agent = agent_cls(state_shape, n_actions, environment, 1, 1, eps_start=0.0)
    agent.load(resume)

    log.info(f'Evaluating agent, loaded from {resume}, starting ...')

    game.reset()
    state = game.get_state()
    for t in count():
        state = game.get_state()
        action = agent.select_action(state)
        transition, done = game.step(int(action.cpu().numpy()))
        # agent.eval(
        #     transition, 1, 0.0)
        time.sleep(0.1)
        if done:
            log.info(f'agent survived {t} steps')
            game.reset()
            break

    game.env.close()
Esempio n. 2
0
def main():
    env = Game(fps=fps,
               screensize=screen,
               state=state,
               playersize=player_size,
               playercolor=player_color,
               enemysize=enemy_size,
               enemycolor=enemy_color,
               squaresize=square_size,
               squarecolor=square_color)
    get_ann = {
        "clever": build_ann,
        "forged": forge_ann,
        "keras": keras_ann
    }.get(agent_type, lambda *args: None)

    # the agent name was already taken by the agent module :(
    actor = get_agent(env=env, get_network=get_ann)
    env.reset(actor)
    env.mainloop()
Esempio n. 3
0
def train(method, environment, resume, episodes, lr, lr_episodes, min_lr,
          eval_only, replay_width, batch_size, gamma, update_rate,
          save_interval):

    history = History(method + '_' + environment,
                      ['steps', 'avg_reward', 'loss'], resume is not None)
    history.flush()
    memory = ReplayMemory(replay_width)
    game = Game(name=environments_to_names[environment],
                memory=memory,
                render=False)
    init_state, state_shape = game.get_state(True)
    n_actions = game.env.action_space.n
    agent_cls = agent_factory[method]
    agent = agent_cls(state_shape,
                      n_actions,
                      environment,
                      episodes,
                      update_rate,
                      step_size=lr_episodes,
                      lr=lr,
                      save_interval=save_interval)

    # resume from a ckpt
    if resume is not None:
        agent.load(resume)

    avg_reward = MovingAverage(100)
    avg_loss = MovingAverage(100)

    log.info(f'Training with {episodes}, starting ...')

    # main training loop
    for i in range(episodes):
        state = game.reset()
        done = False
        loss = None
        while not done:
            state = game.state
            action = agent.select_action(state)

            transition, done = game.step(int(action.to('cpu').numpy()))

            if len(memory) > batch_size:
                batched = memory.sample(batch_size)
                loss = agent.train(batched, batch_size, gamma, i)
                avg_loss.add(loss)
        reward = game.rewards
        # agent.save_best(reward)
        agent.save()
        agent.scheduler.step()
        avg_reward.add(reward)

        # moving averages
        text = [
            f'steps: {agent.step_cnt}',
            f'game epochs: {i}/{episodes}',
            f'train loss: {float(avg_loss):.5}',
            f'avg reward: {float(avg_reward):.5}',
            # f'best reward: {float(agent.best_reward):.5}',
            f'reward: {float(reward):.5}',
            f'epsilon: {agent.epsilon:.3}',
        ]
        log.info(', '.join(text), update=True)
        if agent.step_cnt % save_interval == 0:
            history.record({
                'steps': agent.step_cnt,
                'avg_reward': float(avg_reward),
                'loss': float(avg_loss),
            })

    game.env.close()
Esempio n. 4
0
batch_size = 32

max_score = 0
frames = []
best = []

print('get game window in focus')
for i in list(range(4))[::-1]:
    print(i + 1)
    time.sleep(1)

for e in range(EPISODES):
    frames = []
    reward = -1
    while reward != 0:
        state = env.reset()
        state, reward, done = env.render()
    # apparently the Conv2D wants a 4D shape like (1,64,64,1)
    state = shapeState(state)
    for time in range(500):
        action = agent.act(state)

        #!# in the subsequent line, it is unclear what is the object that normally would be
        #                                                             assigned to next_state, but
        #   in the original code the variable is overwritten by the `next_state = shapeState(pix)`
        #   i.e. with image representation of the state;
        #   the goal seems to be to obtain the reward & done values from the `env.action()` call;
        #   while the subsequent call `pix  = env.render()` is to actually get the image capture...

        # next_state, reward, done, _ = env.step(action)
        # pix  = env.render()
Esempio n. 5
0
statistics = {
    'reward': [],
    'val_loss': [],
    'policy_loss': [],
}

best_reward = 0
for i in range(0, N_EPISODES):
    memory = Memory()
    num_steps = 0
    num_ep = 0
    reward_batch = 0

    while num_steps < BATCH_SIZE:
        S = env.reset()
        S = running_state(S)
        t = 0
        reward_sum = 0

        while True:
            t += 1

            A = ppo_agent.select_best_action(S)
            S_prime, R, is_done = env.take_one_step(A.item())

            reward_sum += R
            mask = 1 - int(is_done)

            memory.push(S, np.array([A.item()]), mask, R)