def choose_action(state): # print("choose s= ", state) prob = np.random.rand() if prob < compute_epsilon(): return explore(state) else: return exploit(state) for i in range(10000): steps += 1 if env.game_over(): env.reset_game() s = (int(agent.getGameState()["snake_head_x"]), int(agent.getGameState()["snake_head_y"])) print("----------") print("s = ", s) action = choose_action(s) env.act(action) print(action) sprim = (0, 0) if action == 119: sprim = (s[0], s[1] + 1) update_w(s, sprim, action) if action == 97: sprim = (s[0] + 1, s[1]) update_w(s, sprim, action) if action == 100: sprim = (s[0] - 1, s[1])
if __name__ == "__main__": game = Snake(width=256, height=256) env = PLE(game, display_screen=True, fps=10, state_preprocessor=process_state) agent = DQNAgent(env) agent.load('./save/snake.h5') env.init() for e in range(EPISODE): env.reset_game() score = 0 state = game.getGameState() state = sorted(state.items(), key=operator.itemgetter(0)) for i in range(len(state)): state[i] = state[i][1] state[2] = len(state[2]) state = np.array([state]) for time_t in range(5000): action = agent.act(state) reward = env.act(action) score += reward next_state = game.getGameState() next_state = sorted(next_state.items(), key=operator.itemgetter(0)) for i in range(len(next_state)):
env = PLE(agent, fps=15, force_fps=False, display_screen=True) env.init() actions = env.getActionSet() q_table = {} alpha = 0.1 gamma = 0.9 while True: print(q_table) old_game_state = round_state(agent.getGameState()) if env.game_over(): env.reset_game() up = q_table.get(tuple(old_game_state.values()) + (119, ), 0) right = q_table.get(tuple(old_game_state.values()) + (97, ), 0) left = q_table.get(tuple(old_game_state.values()) + (100, ), 0) down = q_table.get(tuple(old_game_state.values()) + (115, ), 0) list = [up, right, left, down] max_act = max(list) counter = 0 if max_act == up: