import numpy as np from ple import PLE from ple.games.snake import Snake import random def getReward(env, state, action): return(env.act(action)) agent = Snake(width=360, height=360) Q = {} gama = 0.9 alpha = 0.1 explore = 0.75 env = PLE(agent, fps=15, force_fps=False, display_screen=True) env.init() for i in range(100000): if explore != 0.25: if i % 30000 == 0: explore -= 0.25 if env.game_over(): env.reset_game() state = env.getGameState() del(state["snake_body_pos"])
def get_grille(game, snake_location, food_location): s_x, s_y = snake_location f_x, f_x = food_location grille = [[0] * int(game.width / 10) for i in range(int(game.height / 10))] grille[s_x][s_y] += 1 grille[f_x][f_x] += 1 return grille # NE PAS CHANGER CETTE VARIABLE case_size = 20 size = 10 # Initialisation du jeu game = Snake(height=case_size * size, width=case_size * size) p = PLE(game, fps=30, display_screen=True) agent = Trainer(allowed_actions=p.getActionSet(), height=game.height, width=game.width) p.init() reward = 0.0 nb_frames = 10000000000000000 bestScore = 0 for i in range(nb_frames): if (p.score() > bestScore): bestScore = int(p.score())
import numpy as np from ple import PLE from ple.games.snake import Snake def round_state(state): newState = {} for i in state: if i != 'snake_body' and i != 'snake_body_pos': newState[i] = (int(state[i]) / 60) return newState agent = Snake(width=256, height=256) env = PLE(agent, fps=15, force_fps=False, display_screen=True) env.init() actions = env.getActionSet() q_table = {} alpha = 0.1 gamma = 0.9 while True: print(q_table) old_game_state = round_state(agent.getGameState())
def test_snake(self): from ple.games.snake import Snake game = Snake() self.run_a_game(game)
fy = state['food_y'] turns = individual.activate([sx,sy,fx,fy]) best_turn = max(turns) if turns[0] == best_turn: fitness += snake_game.act(UP) elif turns[1] == best_turn: fitness += snake_game.act(LEFT) elif turns[2] == best_turn: fitness += snake_game.act(RIGHT) else: fitness += snake_game.act(DOWN) return fitness if __name__ == '__main__': # set up flappybird game game = Snake(width=400, height=400) # NOTE- if training: set force_fps = true, if testing: set force_fps to false snake_game = PLE(game, fps=30, display_screen=True, force_fps=False) snake_game.init() # uncomment this block to train a solution #model = neat.NEAT(config_file="snake.config") #best_genome = model.run(fitness_function=test_snake) #pickle.dump( best_genome, open( "snek", "wb" ) ) # uncomment this block to test solution LEFT = 119 DOWN = 97 UP = 100 RIGHT = 115 individual = pickle.load(open("snek", "rb"))
def discounted_rewards(rewards, gamma=0.99): res = [] for r in reversed(rewards): cum_reward = res[0] if res else 0 res.insert(0, gamma * cum_reward + r) return res def train(env, agent): optimizer = torch.optim.Adam(agent.parameters()) while True: agent.zero_grad() p, r = play_episode(env, agent) r = torch.tensor(discounted_rewards(r), device=agent.device) loss = -r * p loss = loss.mean() loss.backward() optimizer.step() if __name__ == '__main__': env = PLE(Snake(), fps=30, display_screen=True) env.init() agent = Agent(env.getScreenDims(), 16, env.getActionSet()) train(env, agent)