Esempio n. 1
0

def choose_action(state):
    # print("choose s= ", state)
    prob = np.random.rand()
    if prob < compute_epsilon():
        return explore(state)
    else:
        return exploit(state)


for i in range(10000):
    steps += 1
    if env.game_over():
        env.reset_game()
    s = (int(agent.getGameState()["snake_head_x"]),
         int(agent.getGameState()["snake_head_y"]))
    print("----------")
    print("s = ", s)
    action = choose_action(s)
    env.act(action)
    print(action)
    sprim = (0, 0)
    if action == 119:
        sprim = (s[0], s[1] + 1)
        update_w(s, sprim, action)
    if action == 97:
        sprim = (s[0] + 1, s[1])
        update_w(s, sprim, action)
    if action == 100:
        sprim = (s[0] - 1, s[1])
Esempio n. 2
0
if __name__ == "__main__":

    game = Snake(width=256, height=256)
    env = PLE(game,
              display_screen=True,
              fps=10,
              state_preprocessor=process_state)
    agent = DQNAgent(env)
    agent.load('./save/snake.h5')
    env.init()

    for e in range(EPISODE):
        env.reset_game()
        score = 0
        state = game.getGameState()

        state = sorted(state.items(), key=operator.itemgetter(0))
        for i in range(len(state)):
            state[i] = state[i][1]
        state[2] = len(state[2])
        state = np.array([state])
        for time_t in range(5000):
            action = agent.act(state)

            reward = env.act(action)
            score += reward

            next_state = game.getGameState()
            next_state = sorted(next_state.items(), key=operator.itemgetter(0))
            for i in range(len(next_state)):
Esempio n. 3
0
env = PLE(agent, fps=15, force_fps=False, display_screen=True)

env.init()

actions = env.getActionSet()

q_table = {}
alpha = 0.1
gamma = 0.9

while True:

    print(q_table)

    old_game_state = round_state(agent.getGameState())

    if env.game_over():
        env.reset_game()

    up = q_table.get(tuple(old_game_state.values()) + (119, ), 0)
    right = q_table.get(tuple(old_game_state.values()) + (97, ), 0)
    left = q_table.get(tuple(old_game_state.values()) + (100, ), 0)
    down = q_table.get(tuple(old_game_state.values()) + (115, ), 0)

    list = [up, right, left, down]
    max_act = max(list)

    counter = 0

    if max_act == up: