def getTrainingData(self): print('Getting Training Data . . .') data = [] number = int(self.train_games / 20) for x in range(self.train_games): game = Game(x=self.x, y=self.y) c_data = [] self.game = game snake = game.start() current_state = self.getState(snake) for _ in range(self.max_steps): action = self.getAction() length = snake.length done, snake, closer = game.step(action) if done: break elif not closer: continue else: correct_output = [0, 0, 0] correct_output[action + 1] = 1 num = 1 if snake.length > length: num = 3 for _ in range(num): c_data.append([current_state, correct_output]) current_state = self.getState(snake) if snake.length > 2: for el in c_data: data.append(el) if x % number == 0: print(f'{int(x/self.train_games*100)}%') return data
def eval_genome(genome, config): net = neat.nn.FeedForwardNetwork.create(genome, config) fitnesses = [] for runs in range(runs_per_net): game = Game(20, 20) # Run the given simulation for up to num_steps time steps. fitness = 0.0 while True: inputs = game.get_normalized_state() action = net.activate(inputs) # Apply action to the simulated snake valid = game.step(np.argmax(action)) # Stop if the network fails to keep the snake within the boundaries or hits itself. # The per-run fitness is the number of pills eaten if not valid: break fitness = game.fitness fitnesses.append(fitness) # The genome's fitness is its worst performance across all runs. return min(fitnesses)
def eval_genome(genome, config): net = neat.nn.FeedForwardNetwork.create(genome, config) fitnesses = [] for runs in range(runs_per_net): #pygame.init() #screen = pygame.display.set_mode((20 * 16,20 * 16)) #screen.fill(pygame.Color('black')) #pygame.display.set_caption('Snake') #pygame.display.flip() sim = Game(20, 20) # Run the given simulation for up to num_steps time steps. fitness = 0.0 while True: inputs = sim.get_normalized_state() action = net.activate(inputs) # Apply action to the simulated snake valid = sim.step(np.argmax(action)) # Stop if the network fails to keep the snake within the boundaries or hits itself. # The per-run fitness is the number of pills eaten if not valid: break fitness = sim.score fitnesses.append(fitness) # The genome's fitness is its worst performance across all runs. return min(fitnesses)
class SnakeWrapper: """ return the croped square_size-by-square_size after rotation and changing to one-hot and doing block-notation. """ # num_classes is the number of different element types that can be found on the board. # yes I know, actually we have 9 types, but 10 is nicer. (4 snakes + 1 obstacle + 3 fruits + 1 empty = 9) num_classes = 10 # the action space. 0-left, 1-forward, 2-right. action_space = gym.spaces.Discrete(3) # the observation space. 9x9 one hot vectors, total 9x9x10. # your snake always look up (the observation is a rotated crop of the board). observation_space = gym.spaces.Box( low=0, high=num_classes, shape=(9, 9, 10), dtype=np.int ) def __init__(self): self.game = Game() self.square_size = 9 # the observation size self.timestep = 0 def step(self, action): # get action as integer, move the game one step forward # return tuple: state, reward, done, info. done is always False - Snake game never ends. action = int_to_action[action] reward = self.game.step(action) head_pos = self.game.players[1].chain[-1] direction = self.game.players[1].direction board = self.game.board state = preprocess_snake_state(board, head_pos, direction, self.square_size, SnakeWrapper.num_classes) self.timestep += 1 return state, reward def seed(self, seed=None): return self.game.seed(seed) # reset the game and return the board observation def reset(self): self.game.reset() self.timestep = 0 first_state, _ = self.step(0) return first_state # print the board to the console def render(self, mode='human'): self.game.render(self.timestep)
def showGame(self, model): game = Game(x=self.x, y=self.y, gui=True) self.game = game while True: snake = game.start() steps = self.max_steps current_state = self.getState(snake) while True: m = model.predict(np.array([current_state])) action = list(m[0]).index(max(list(m[0]))) - 1 length = snake.length done, snake, c = game.step(action) if done: break elif snake.length > length: steps = self.max_steps else: current_state = self.getState(snake) time.sleep(.05) steps -= 1 if steps == 0: break
def test(self, model): print('Testing . . .') num = int(self.test_games / 20) lengths = [] game = Game(x=self.x, y=self.y) self.game = game for x in range(self.test_games): snake = game.start() steps = self.max_steps current_state = self.getState(snake) while True: m = model.predict(np.array([current_state])) action = list(m[0]).index(max(list(m[0]))) - 1 length = snake.length done, snake, _ = game.step(action) if done: break elif snake.length > length: steps = self.max_steps else: current_state = self.getState(snake) steps -= 1 if steps == 0: break lengths.append(snake.length) if x % num == 0: print(f'{int((x/self.test_games)*100)}%') print(f'Average: {sum(lengths)/len(lengths)}')
game = Game(food_ammount=1, render=True) valid = True observation = Game().reset() score = 0 q_table = np.load(f"{FILE}.npy", allow_pickle=True) os.makedirs(f"{FILE}", exist_ok=True) step = 0 while valid: game.draw() surface = pygame.display.get_surface() pygame.image.save(surface, f"{FILE}/image_{step}.png") old_observation = observation current_q_values = get_discrete_vals(q_table, old_observation) action = np.argmax(current_q_values) old_q = current_q_values[action] valid, reward, observation = game.step(action=action) step += 1 # time.sleep(0.03) game.draw() surface = pygame.display.get_surface() pygame.image.save(surface, f"{FILE}/image_{step}.png")