def initial_population(self): training_data = [] for _ in range(self.initial_games): game = SnakeGame() _, prev_score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food = game.step(game_action) if done: training_data.append([ self.add_action_to_observation(prev_observation, action), -1 ]) break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) else: training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance return training_data
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = SnakeGame() _, prev_score, snake, food, obstacles = game.start() # returns generate_observation from snake game. prev_observation = self.generate_observation(snake, food, obstacles) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food, obstacles = game.step(game_action) #print(training_data) #input("test") if done: training_data.append([self.add_action_to_observation(prev_observation, action), -1]) break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: # did you get closer to the objective? training_data.append([self.add_action_to_observation(prev_observation, action), 1]) # label as good decision. else: training_data.append([self.add_action_to_observation(prev_observation, action), 0]) # label as bad decision. prev_observation = self.generate_observation(snake, food, obstacles) prev_food_distance = food_distance ''' Later we will be using this "1" or "0" to provide estimates for each possible decision. ''' return training_data
def test_model(self, model): steps_arr = [] scores_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) game_memory.append([prev_observation, action]) if done: # print('-----') # print(steps) # print(snake) # print(food) # print(prev_observation) # print(predictions) break else: prev_observation = self.generate_observation(snake, food) steps += 1 steps_arr.append(steps) scores_arr.append(score)
def test_model(self, model): steps_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 4, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, _, snake, _ = game.step(game_action) game_memory.append([prev_observation, action]) if done: break else: prev_observation = self.generate_observation(snake) steps += 1 steps_arr.append(steps) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr))
def test_model(self, model): steps_arr = [] scores_arr = [] count = 0 solved = 0 print("Testing in progress") for i in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() if self.game_type == 'maze': game = MazeGame() _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) game_memory.append([prev_observation, action]) if done: self.progress(i + 1, self.test_games) if self.game_type == 'maze' and score == 1: solved += 1 count += 1 if False: #if count % 100 == 0: print('-----') print('id: ' + str(count)) print(steps) print(snake) print(food) print(prev_observation) print(predictions) break else: prev_observation = self.generate_observation(snake, food) steps += 1 steps_arr.append(steps) scores_arr.append(score) print("\n\n") print('Average steps:', mean(steps_arr)) #print(Counter(steps_arr)) print('Average score:', mean(scores_arr)) #print(Counter(scores_arr)) scores_arr.sort() print('Lowest score:', scores_arr[0]) print('Highest score:', scores_arr[-1]) if self.game_type == 'maze': print('Total solved mazes:', solved) with open('steps_arr', 'wb') as file: pickle.dump(steps_arr, file) with open('scores_arr', 'wb') as file: pickle.dump(scores_arr, file)
def visualise_game(self, model): game = SnakeGame(gui = True) _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append(model.predict(self.add_action_to_observation(prev_observation, action).reshape(-1, 4, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, _, snake, _ = game.step(game_action) if done: break else: prev_observation = self.generate_observation(snake)
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = SnakeGame() _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, _, snake, _ = game.step(game_action) if done: training_data.append([self.add_action_to_observation(prev_observation, action), 0]) break else: training_data.append([self.add_action_to_observation(prev_observation, action), 1]) prev_observation = self.generate_observation(snake) print(len(training_data)) return training_data
def test_model(self, model): steps_arr = [] scores_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, score, snake, food, obstacles = game.start() prev_observation = self.generate_observation( snake, food, obstacles) for _ in range(self.goal_steps): predictions = [] for action in range( -1, 2): # iterate through each possible decision predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array( predictions)) # choose decision with highest value (1) game_action = self.get_game_action( snake, action - 1) # perform action in the game. done, score, snake, food, obstacles = game.step(game_action) game_memory.append([prev_observation, action]) if done: print('-----') print(steps) print(snake) print(food) print(prev_observation) print(predictions) break else: prev_observation = self.generate_observation( snake, food, obstacles) steps += 1 steps_arr.append(steps) scores_arr.append(score) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr)) print('Average score:', mean(scores_arr)) print(Counter(scores_arr))
def initial_population(self): training_data = [] print("Generating initial games") for i in range(self.initial_games): self.progress(i + 1, self.initial_games) game = SnakeGame() if self.game_type == 'maze': game = MazeGame() _, prev_score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food = game.step(game_action) if done: training_data.append([ self.add_action_to_observation(prev_observation, action), -1 ]) # Snake is dead break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) # The last move was efficient else: training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) # The last move was not efficient prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance with open('init_pop_gen', 'wb') as file: pickle.dump(training_data, file) return training_data
############# ## Imports ## ############# import sys from snake import SnakeGame ########### ## Play! ## ########### speed = 15 if len(sys.argv) > 1: speed = float(sys.argv[1]) snake_game = SnakeGame(speed=speed) snake_game.start()