class SnakeEnv(Env): def __init__(self): self.action_space = Discrete(3) # 0 = turn left, 1 = do nothing, 2 = turn right self.state = [0, 0, 1, 0] self.game = Game() self.reward = 0 self.done = False def step(self, action): offset = (action - 1) translated_action = offset + self.game.snake.direction if translated_action < 0: translated_action = 3 if translated_action > 3: translated_action = 0 self.reward, self.done = self.game.run(1, translated_action) diff = (self.game.food.position[0] - self.game.snake.snake[0][0], self.game.food.position[1] - self.game.snake.snake[0][1]) self.state[0] = int(diff[0] < 0) self.state[2] = int(diff[0] > 1) self.state[1] = int(diff[1] < 0) self.state[3] = int(diff[1] > 0) return self.state, self.reward, self.done, {} def render(self): self.game.render() def reset(self): self.game.reset()
class SnakeWrapper: """ return the croped square_size-by-square_size after rotation and changing to one-hot and doing block-notation. """ # num_classes is the number of different element types that can be found on the board. # yes I know, actually we have 9 types, but 10 is nicer. (4 snakes + 1 obstacle + 3 fruits + 1 empty = 9) num_classes = 10 # the action space. 0-left, 1-forward, 2-right. action_space = gym.spaces.Discrete(3) # the observation space. 9x9 one hot vectors, total 9x9x10. # your snake always look up (the observation is a rotated crop of the board). observation_space = gym.spaces.Box( low=0, high=num_classes, shape=(9, 9, 10), dtype=np.int ) def __init__(self): self.game = Game() self.square_size = 9 # the observation size self.timestep = 0 def step(self, action): # get action as integer, move the game one step forward # return tuple: state, reward, done, info. done is always False - Snake game never ends. action = int_to_action[action] reward = self.game.step(action) head_pos = self.game.players[1].chain[-1] direction = self.game.players[1].direction board = self.game.board state = preprocess_snake_state(board, head_pos, direction, self.square_size, SnakeWrapper.num_classes) self.timestep += 1 return state, reward def seed(self, seed=None): return self.game.seed(seed) # reset the game and return the board observation def reset(self): self.game.reset() self.timestep = 0 first_state, _ = self.step(0) return first_state # print the board to the console def render(self, mode='human'): self.game.render(self.timestep)
# average fitness of a population lstr(sum(n.fitness for n in networks) / len(networks)), ]) if generation >= max_generation: break # select for the next generation networks = select_networks(networks) # crossover for the next generation networks = crossover_networks(networks) # save data for the performance graph with open("snake_data.csv", 'w') as file: writer = csv.writer(file, delimiter=';') # delimit with semicolons writer.writerows(snake_data) # restore the best network best_net = Network(net_args) best_net.fitness = best_net_data[0] best_net.import_data(best_net_data[1]) # run it print(f"\nRunning best network: {best_net.fitness} fitness") # attach the controller game.external = partial(controller, best_net) game.fps = 6 while not game.window.has_exit: # reset the game game.reset() # run the game game.run() print(f"Score: {game.score}") sleep(1)