Ejemplo n.º 1
0
def train(num_episodes, episode_length, gamma=0.7):

    for episode_no in range(num_episodes):

        print(f"\rEpisode {episode_no} out of {num_episodes}", end="\r")

        games = [SnakeGame(**opts) for i in range(batch_size)]
        memories = [Memory() for i in range(batch_size)]

        for _ in range(episode_length):
            observations = numpy.array([game.get_board() for game in games])
            actions = choose_action(snake_model, observations, single=False)
            for game, action in zip(games, actions):
                game.tick(game_actions[action])

            for memory, observation, action, game in zip(
                memories, observations, actions, games
            ):
                memory.add_to_memory(observation, action, reward(game))

            for i in range(batch_size):
                if games[i].game_over:
                    games[i] = SnakeGame(**opts)

        batch_memory = aggregate_memories(memories)

        train_step(
            snake_model,
            optimizer,
            observations=numpy.stack(batch_memory.observations, 0),
            actions=numpy.array(batch_memory.actions),
            discounted_rewards=discount_rewards(
                batch_memory.rewards, GAME_OVER_REWARD, gamma
            ),
        )
Ejemplo n.º 2
0
def main():
    g = SnakeGame()

    try:
        g.run()
    finally:
        g.quit_game()
Ejemplo n.º 3
0
 def test_model(self, model):
     steps_arr = []
     scores_arr = []
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         _, score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(-1, 2):
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 5, 1)))
             action = np.argmax(np.array(predictions))
             game_action = self.get_game_action(snake, action - 1)
             done, score, snake, food = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 # print('-----')
                 # print(steps)
                 # print(snake)
                 # print(food)
                 # print(prev_observation)
                 # print(predictions)
                 break
             else:
                 prev_observation = self.generate_observation(snake, food)
                 steps += 1
         steps_arr.append(steps)
         scores_arr.append(score)
Ejemplo n.º 4
0
 def initial_population(self):
     training_data = []
     for _ in range(self.initial_games):
         game = SnakeGame()
         _, prev_score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         prev_food_distance = self.get_food_distance(snake, food)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, score, snake, food = game.step(game_action)
             if done:
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), -1
                 ])
                 break
             else:
                 food_distance = self.get_food_distance(snake, food)
                 if score > prev_score or food_distance < prev_food_distance:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 1
                     ])
                 else:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 0
                     ])
                 prev_observation = self.generate_observation(snake, food)
                 prev_food_distance = food_distance
     return training_data
Ejemplo n.º 5
0
    def __init__(self):
        super().__init__('cs!')

        self.game = SnakeGame(board_size)
        self.last_msg = None
        self.tie_detected = False
        self.has_ended = False
Ejemplo n.º 6
0
 def initial_population(self):
     training_data = []
     for _ in range(self.initial_games):
         game = SnakeGame()
         _, prev_score, snake, food, obstacles = game.start() # returns generate_observation from snake game.
         prev_observation = self.generate_observation(snake, food, obstacles)
         prev_food_distance = self.get_food_distance(snake, food)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, score, snake, food, obstacles  = game.step(game_action)
             #print(training_data)
             #input("test")
             if done:
                 training_data.append([self.add_action_to_observation(prev_observation, action), -1])
                 break
             else:
                 food_distance = self.get_food_distance(snake, food)
                 if score > prev_score or food_distance < prev_food_distance: # did you get closer to the objective?
                     training_data.append([self.add_action_to_observation(prev_observation, action), 1]) # label as good decision.
                 else:
                     training_data.append([self.add_action_to_observation(prev_observation, action), 0]) # label as bad decision.
                 prev_observation = self.generate_observation(snake, food, obstacles)
                 prev_food_distance = food_distance
                 '''
                 Later we will be using this "1" or "0" to provide estimates for each possible decision. 
                 '''
     return training_data
Ejemplo n.º 7
0
 def test_model(self, model):
     steps_arr = []
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         _, _, snake, _ = game.start()
         prev_observation = self.generate_observation(snake)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(-1, 2):
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 4, 1)))
             action = np.argmax(np.array(predictions))
             game_action = self.get_game_action(snake, action - 1)
             done, _, snake, _ = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 break
             else:
                 prev_observation = self.generate_observation(snake)
                 steps += 1
         steps_arr.append(steps)
     print('Average steps:', mean(steps_arr))
     print(Counter(steps_arr))
Ejemplo n.º 8
0
def start_game(board_size, delay_time=250):
    game = SnakeGame(board_size)
    win, ren = init_sdl2(game, 'snek')

    game_thread = Thread(target=game_loop, args=(game, delay_time))
    game_thread.start()
    while game_thread.is_alive():
        show_game(game, ren)
Ejemplo n.º 9
0
 def test_model(self, model):
     steps_arr = []
     scores_arr = []
     count = 0
     solved = 0
     print("Testing in progress")
     for i in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         if self.game_type == 'maze':
             game = MazeGame()
         _, score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(-1, 2):
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 5, 1)))
             action = np.argmax(np.array(predictions))
             game_action = self.get_game_action(snake, action - 1)
             done, score, snake, food = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 self.progress(i + 1, self.test_games)
                 if self.game_type == 'maze' and score == 1: solved += 1
                 count += 1
                 if False:
                     #if count % 100 == 0:
                     print('-----')
                     print('id: ' + str(count))
                     print(steps)
                     print(snake)
                     print(food)
                     print(prev_observation)
                     print(predictions)
                 break
             else:
                 prev_observation = self.generate_observation(snake, food)
                 steps += 1
         steps_arr.append(steps)
         scores_arr.append(score)
     print("\n\n")
     print('Average steps:', mean(steps_arr))
     #print(Counter(steps_arr))
     print('Average score:', mean(scores_arr))
     #print(Counter(scores_arr))
     scores_arr.sort()
     print('Lowest score:', scores_arr[0])
     print('Highest score:', scores_arr[-1])
     if self.game_type == 'maze': print('Total solved mazes:', solved)
     with open('steps_arr', 'wb') as file:
         pickle.dump(steps_arr, file)
     with open('scores_arr', 'wb') as file:
         pickle.dump(scores_arr, file)
Ejemplo n.º 10
0
    def generate_training_data(self, initial_games, goal_steps):
        """Generate training data for the neural network 
        based on random action. 
        
        Parameters
        ----------
        initial_games : int
            number of games for the training
        goal_steps : int
            max number of steps in a game
            
        Returns
        -------
        list
            list containing the input data and the targets
        """
        training_data = []
        from tqdm import tqdm
        for i in tqdm(range(initial_games)):
            state = SnakeGame()
            prev_food_distance = self.get_distance(state.snake[0], state.food)
            prev_score = state.score
            prev_observation = self.get_observation(state)
            for j in range(goal_steps):
                # Get action
                action = self.generate_action(state)

                # Update state
                state = state(action)

                # We will now evaluate the performed moves, using
                # a target system where -1 means a bad move, 0 means a neutral
                # move and 1 means a good move.

                # A move is bad if the snake crashes.
                if state.done:
                    target = -1
                    training_data.append(
                        self.pack_data(prev_observation, action, target))
                    break
                else:
                    food_distance = self.get_distance(state.snake[0],
                                                      state.food)

                    # A move is considered as good if the snake
                    # gets closer to the food or eats the food.
                    if state.score > prev_score or food_distance < prev_food_distance:
                        target = 1
                    else:
                        target = 0
                    training_data.append(
                        self.pack_data(prev_observation, action, target))
                    prev_observation = self.get_observation(state)
                    prev_food_distance = food_distance
                    prev_score = state.score
        return training_data
Ejemplo n.º 11
0
 def visualise_game(self, model):
     game = SnakeGame(gui = True)
     _, _, snake, _ = game.start()
     prev_observation = self.generate_observation(snake)
     for _ in range(self.goal_steps):
         predictions = []
         for action in range(-1, 2):
            predictions.append(model.predict(self.add_action_to_observation(prev_observation, action).reshape(-1, 4, 1)))
         action = np.argmax(np.array(predictions))
         game_action = self.get_game_action(snake, action - 1)
         done, _, snake, _  = game.step(game_action)
         if done:
             break
         else:
             prev_observation = self.generate_observation(snake)
Ejemplo n.º 12
0
 def initial_population(self):
     training_data = []
     for _ in range(self.initial_games):
         game = SnakeGame()
         _, _, snake, _ = game.start()
         prev_observation = self.generate_observation(snake)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, _, snake, _  = game.step(game_action)
             if done:
                 training_data.append([self.add_action_to_observation(prev_observation, action), 0])
                 break
             else:
                 training_data.append([self.add_action_to_observation(prev_observation, action), 1])
                 prev_observation = self.generate_observation(snake)
     print(len(training_data))
     return training_data
Ejemplo n.º 13
0
 def __init__(self):
     super(SnakeGameDemoWidget, self).__init__()
     self.resize(QtCore.QSize(300, 300))
     self.setWindowTitle("SnakeGame")
     self.snake_game = SnakeGame()
     self.snake_game.deterministic_food = True
     self.snake_game.food_positions = [
         (6, 6),
         (2, 15),
         (17, 3)
     ]
     self.snake_game.rect = self.rect()
     self.snake_game.width = self.width()
     self.snake_game.height = self.height()
     self.snake_game.setup()
     self.tick_timer = QTimer()
     self.tick_timer.setInterval(100)
     self.tick_timer.timeout.connect(self.tick)
Ejemplo n.º 14
0
 def test_model(self, model):
     steps_arr = []
     scores_arr = []
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         _, score, snake, food, obstacles = game.start()
         prev_observation = self.generate_observation(
             snake, food, obstacles)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(
                     -1, 2):  # iterate through each possible decision
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 5, 1)))
             action = np.argmax(np.array(
                 predictions))  # choose decision with highest value (1)
             game_action = self.get_game_action(
                 snake, action - 1)  # perform action in the game.
             done, score, snake, food, obstacles = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 print('-----')
                 print(steps)
                 print(snake)
                 print(food)
                 print(prev_observation)
                 print(predictions)
                 break
             else:
                 prev_observation = self.generate_observation(
                     snake, food, obstacles)
                 steps += 1
         steps_arr.append(steps)
         scores_arr.append(score)
     print('Average steps:', mean(steps_arr))
     print(Counter(steps_arr))
     print('Average score:', mean(scores_arr))
     print(Counter(scores_arr))
Ejemplo n.º 15
0
def train_agent():
    plot_scores = []
    plot_mean_scores = []
    total_score = 0
    record = 0
    agent = Agent()
    game = SnakeGame()
    while True:
        # get old state
        state_old = agent.get_state(game)

        # get move
        final_move = agent.get_action(state_old)

        # perform move and get new state
        reward, done, score = game.play_step(final_move)
        state_new = agent.get_state(game)

        # train short memory
        agent.train_short_memory(state_old, final_move, reward, state_new,
                                 done)

        # remember
        agent.remember(state_old, final_move, reward, state_new, done)

        if done:
            game.reset()
            agent.num_games += 1
            agent.train_long_memory()

            if score > record:
                record = score
                agent.model.save()

            print(f'Game: {agent.num_games}, Score: {score}, Record: {record}')

            plot_scores.append(score)
            total_score += score
            mean_score = total_score / agent.num_games
            plot_mean_scores.append(mean_score)
            plot_scores(plot_scores, plot_mean_scores)
Ejemplo n.º 16
0
    async def advance(self):
        if self.game.has_ended:
            self.last_msg = None
            self.game = SnakeGame(board_size)
            self.game.has_ended = False
            return

        if self.last_msg:
            # update last_msg with the one in cache (https://github.com/Rapptz/discord.py/issues/861)
            self.last_msg = discord.utils.get(self.cached_messages,
                                              id=self.last_msg.id)

            direction = None
            tie_message_appended = False
            prev_last_message = None
            while direction is None:
                direction = self.get_winning_move()
                if not direction:
                    if not tie_message_appended:
                        prev_last_message = self.last_msg.content
                        await self.last_msg.edit(
                            content='**❗ Tie detected**\n' + prev_last_message)
                        tie_message_appended = True

                    def check(reaction: discord.Reaction, user: discord.User):
                        return user.id != self.user.id and reaction.message.id == self.last_msg.id

                    await self.wait_for('reaction_add', check=check)
            if tie_message_appended:
                await self.last_msg.edit(content=prev_last_message)

            self.game.advance(direction)

        img = self.create_image()
        await self.send_new_state(img)

        if self.game.has_ended:
            self.has_ended = True
Ejemplo n.º 17
0
 def initial_population(self):
     training_data = []
     print("Generating initial games")
     for i in range(self.initial_games):
         self.progress(i + 1, self.initial_games)
         game = SnakeGame()
         if self.game_type == 'maze':
             game = MazeGame()
         _, prev_score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         prev_food_distance = self.get_food_distance(snake, food)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, score, snake, food = game.step(game_action)
             if done:
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), -1
                 ])  # Snake is dead
                 break
             else:
                 food_distance = self.get_food_distance(snake, food)
                 if score > prev_score or food_distance < prev_food_distance:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 1
                     ])  # The last move was efficient
                 else:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 0
                     ])  # The last move was not efficient
                 prev_observation = self.generate_observation(snake, food)
                 prev_food_distance = food_distance
     with open('init_pop_gen', 'wb') as file:
         pickle.dump(training_data, file)
     return training_data
Ejemplo n.º 18
0
def train(num_episodes):
    max_moves_without_fruit = 15

    for i in range(num_episodes):
        game = SnakeGame(width, height, num_fruit=3)
        num_moves_without_fruit = 0
        while not game.game_over:
            observation = numpy.copy(game.board)
            action = choose_action(snake_model, observation)
            game.tick(game_actions[action])
            ## next_observation = numpy.copy(game.board)

            num_moves_without_fruit += 1

            if game.game_over:
                reward = -10
            elif game.just_ate_fruit:
                reward = 1
            elif num_moves_without_fruit > max_moves_without_fruit:
                reward = -1
                num_moves_without_fruit = 0
            else:
                reward = 0

            memory.add_to_memory(observation, action, reward)

            if game.game_over:
                #### total_reward  = sum(memory.rewards)
                total_observation = numpy.stack(memory.observations, 0)
                total_action = numpy.array(memory.actions)
                total_rewards = discount_rewards(memory.rewards, gamma)
                train_step(snake_model, optimizer, total_observation,
                           total_action, total_rewards)

                memory.clear()
                break
Ejemplo n.º 19
0
                num_games += 1
                lengths += game.score()
                game.begin()
            
        # Save state every few runs
        if i % 5 == 0:
            agent.save_nn(path)
        eps = max(eps * decay, eps_min)
        if num_games > 0:
            print(' - games:', num_games, ', avg score:', 
                  round(lengths/num_games, 2), ', games won:', games_won)
    agent.save_nn(path)


# Initialize game
game = SnakeGame(SIZE, SIZE)

# Initialize Neural Net
nn = NeuralNetwork(inp_dim=[VIEW * VIEW + 8], out_dim=4, 
                   l1_dim=256, l2_dim=128, lr=.0001)

# Initialize memory
memory = ReplayBuffer(inp_dim=[VIEW * VIEW + 8], mem_size=100000, 
                      batch_size=64, priority_scale=PRIO)

# Initialize Deep Q Agent
agent = Agent(nn=nn, inp_dim=[VIEW * VIEW + 8], out_dim=4, 
              memory=memory, gamma=.99)

# Run training loop
train(game, agent, PATH + FILENAME, loops=100, steps=1000, eps=0, 
Ejemplo n.º 20
0
def fitnessFunction(individual, final):
    score = 0
    sn = SnakeGame(100, 50, final)
    sn.startGame()
    movements = [sn.moveForward, sn.turnRight, sn.turnLeft]
    nn = NeuralNetwork(initLearningRate, numberOfInputs, numberOfLayers,
                       numberOfNeuronsPerLayer)
    gene = 0
    #print("----------------------")
    #print(individual)
    for layer in range(len(numberOfNeuronsPerLayer)):
        for neuron in range(0, numberOfNeuronsPerLayer[layer]):

            nn.layers[layer].neurons[neuron].bias = individual[gene][1]
            nn.layers[layer].neurons[neuron].weights = individual[gene][0]
            gene = gene + 1
    for i in range(0, maxMovementsSnake):
        board = sn.board
        food = sn.foodPosition
        snakeHead = sn.snake.snakeBody[0]

        directionPosition = sn.snake.directions.index(sn.snake.headDirection)
        front = [
            snakeHead[0] + sn.snake.directions[directionPosition][0],
            snakeHead[1] + sn.snake.directions[directionPosition][1]
        ]
        right = [
            snakeHead[0] + sn.snake.directions[(directionPosition + 1) % 4][0],
            snakeHead[1] + sn.snake.directions[(directionPosition + 1) % 4][1]
        ]
        left = [
            snakeHead[0] + sn.snake.directions[directionPosition - 1][0],
            snakeHead[1] + sn.snake.directions[directionPosition - 1][1]
        ]

        #nnInput=[board.boardMatrix[int(front[0])][int(front[1])]-4.0/(1+distance(front,food)),board.boardMatrix[int(right[0])][int(right[1])]-4.0/(1+distance(right,food)),board.boardMatrix[int(left[0])][int(left[1])]-4.0/(1+distance(left,food))]

        nnInput = [
            board.boardMatrix[int(front[0])][int(front[1])] +
            distance(front, food),
            board.boardMatrix[int(right[0])][int(right[1])] +
            distance(right, food),
            board.boardMatrix[int(left[0])][int(left[1])] +
            distance(left, food)
        ]

        nnInputNormalized = []
        for i in nnInput:

            nnInputNormalized.append(normalize(min(nnInput), max(nnInput), i))

        result = nn.feed(nnInputNormalized)

        movement = movements[result.index(max(result))]

        sn.nextMove = movement
        sn.play()

        if sn.isAlive() == False:

            break

    score = sn.score + 1 / (1 + (distance(snakeHead, food)))

    return score
Ejemplo n.º 21
0
        if key == Key.left:
            self.action_list.append(self.game.ACTIONS["LEFT"])

        if key == Key.right:
            self.action_list.append(self.game.ACTIONS["RIGHT"])

        if key == Key.esc:
            self.game.done = True

    def __call__(self, game, speed):
        time.sleep(speed)

        action = self.game.ACTIONS["FORWARD"]

        if len(self.action_list) > 0:
            action = self.action_list.pop(0)

        return action


if __name__ == "__main__":
    from snake import SnakeGame
    from gui import MatplotlibGui, TerminalGui, YeetTerminalGui, NoGui

    snake_game = SnakeGame(6, 7)
    # keyboard_listener = KeyboardListener(snake_game)

    player = Player(snake_game, best_engine, YeetTerminalGui(), speed=1)
    player.play_game()
Ejemplo n.º 22
0
def play(model: Optional["tf.keras.Model"] = None, **override_game_opts):
    # with python 3.9, this could be SnakeGame(game_options | override_game_opts)
    opts = game_options.copy()
    opts.update(override_game_opts)
    game = SnakeGame(**opts)
    play_game(game, model)
Ejemplo n.º 23
0
###################################################


# this will perform necessary steps to start the game and get it ready to perform, and return any values we will be manipulating.
# will return these values as a list.
def start_game(game): # this s passed by reference, so us starting the game within the function should work globally.
    a, b, c, d, e = game.start() # steps, prev_score, snake (a list), food (x,y), obstacles (list of x,y pairs).
    return [a,b,c,d,e] # will vary based on game. These are the things we will be using to train the neural net later.
    

while active:
    # Create a Game object and get it ready.
    print("Initializing the game.")
    if(MODE == 1):
        game = SnakeGame(gui = True)
    elif(MODE == 0):
        game = SnakeGame()
    print("Game initialized.")
    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    sock.bind((SERVER_IP, IN_PORT)) # bind the incoming port. 
    print("Waiting for Client.")
    
    while not connected:
        try:
            data, addr = sock.recvfrom(1024)
            received_json = json.loads(data)
            if(received_json[0] == 0): # if client's code is "connecting"
                connected = True
                in_progress = True
                print("Client has connected: " + str(addr))
Ejemplo n.º 24
0
# as_ = AStarGrid(test_grid, start, end)
# as_.compute_longest()
#
# test_grid = np.full([4, 4], fill_value=1)
# start = [3, 2]
# end = [3, 1]
# as_ = AStarGrid(test_grid, start, end)
# as_.compute_longest()

# test_grid = np.full([5, 5], fill_value=1)
# start = [3, 0]
# end = [1, 4]
# as_ = AStarGrid(test_grid, start, end)
# as_.compute_longest()

snake_game = SnakeGame(display_width=400, display_height=440, snake_speed=100, snake_block=20, theme='mark_track',
                       mode='A_star')
snake_game.play_game()
# grid, snake_ = snake_game.astar_game_loop(1)
print('End')


#
# test_grid = np.full([4, 4], fill_value=1)
# # Coordinates should be 2D/consistent with grid.
# snake_ = np.array([[2, 3], [2, 2], [1, 2], [0, 2]])
# test_grid[snake_[:, 0], snake_[:, 1]] = 0
# end = [3, 3]
# asg = AStarGrid(test_grid, snake_[0], end, snake=snake_)
# # track = asg.compute_shortest()
# track = asg.compute_longest()
# print('End.')
Ejemplo n.º 25
0
        # Get data
        training_data = self.generate_training_data(initial_games, goal_steps)
        x = torch.tensor([i[0] for i in training_data]).reshape(-1, 5)
        t = torch.tensor([i[1] for i in training_data]).reshape(-1, 1)

        # Define loss and optimizer
        loss_func = nn.MSELoss()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)

        # Train network
        for epoch in range(max_iter):
            # Forward propagation
            y = self.model(x.float())
            loss = loss_func(y, t.float())
            print("epoch: ", epoch, " loss: ",
                  loss.item())  # Zero the gradients
            optimizer.zero_grad()

            # Backward propagation
            loss.backward()  # perform a backward pass (backpropagation)
            optimizer.step()  # update parameters


if __name__ == "__main__":
    from player import Player
    from gui import MatplotlibGui, TerminalGui

    engine = DNN_Engine(initial_games=500, lr=2e-2, max_iter=500)
    player = Player(SnakeGame(), engine, TerminalGui(), speed=1)
    player.play_game()
Ejemplo n.º 26
0
            actions = choose_action(snake_model, observations, single=False)
            for game, action in zip(games, actions):
                game.tick(game_actions[action])

            for memory, observation, action, game in zip(
                memories, observations, actions, games
            ):
                memory.add_to_memory(observation, action, reward(game))

            for i in range(batch_size):
                if games[i].game_over:
                    games[i] = SnakeGame(**opts)

        batch_memory = aggregate_memories(memories)

        train_step(
            snake_model,
            optimizer,
            observations=numpy.stack(batch_memory.observations, 0),
            actions=numpy.array(batch_memory.actions),
            discounted_rewards=discount_rewards(
                batch_memory.rewards, GAME_OVER_REWARD, gamma
            ),
        )


train(100, 100, 0.8)

game = SnakeGame(**opts)
play_game(game, snake_model)
Ejemplo n.º 27
0
 def __init__(self, shape=(10, 12, 4), size=(20, 20)):
     self.NeuralNetwork = NeuralNetwork(shape)
     self.row, self.col = size
     self.game = SnakeGame(self.row, self.col)
Ejemplo n.º 28
0
from curses import KEY_RIGHT, KEY_LEFT, KEY_UP, KEY_DOWN
from snake import SnakeGame
import random
import numpy as np
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter

game = SnakeGame( True )

def randomGames():
	for _ in range(5):
		game.reset( True )
		for _ in range(200):
			win = game.getWindow()
			win.getch()
			action = game.sample()
			#print action
			state, reward, alive = game.step( action )
			if not alive:
				break
        game.close()

#randomGames()
game.close()
print game.getState()

initial_games = 50000
goal_steps = 500
Ejemplo n.º 29
0
            if game.game_over:
                reward = -10
            elif game.just_ate_fruit:
                reward = 1
            elif num_moves_without_fruit > max_moves_without_fruit:
                reward = -1
                num_moves_without_fruit = 0
            else:
                reward = 0

            memory.add_to_memory(observation, action, reward)

            if game.game_over:
                #### total_reward  = sum(memory.rewards)
                total_observation = numpy.stack(memory.observations, 0)
                total_action = numpy.array(memory.actions)
                total_rewards = discount_rewards(memory.rewards, gamma)
                train_step(snake_model, optimizer, total_observation,
                           total_action, total_rewards)

                memory.clear()
                break


for i in range(16):
    train(1250)
    print(i)

game = SnakeGame(width, height, num_fruit=3)
play_game(game, snake_model)
Ejemplo n.º 30
0
def testSnakeAI(net, size=(20, 20), blockSize=20, screen=None): #TODO: kwargs?
    #TODO: dynamic code that can handle varying sizes
    game = SnakeGame(size, blockSize)
    frameCount = 0
    
    lastDir = 1
    foodBonus = 0
    shortestDistanceToFood = 25
    
    while frameCount < 1000:
        inputs = []
        for i in range(28):
            inputs.append(0)
        
        #7 0 1
        #6   2
        #5 4 3
        probeDirMap = [(0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)]
        
        for i in range(8):
            #distance to self
            move = probeDirMap[i]
            dist = 0
            pos = game.data[0]
            while True:
                pos = (pos[0] + move[0], pos[1] + move[1])
                for segment in game.data:
                    if segment == pos:
                        break
                dist += 1
                if dist == 25:
                    break
            
            inputs[i] = dist * 4
        
        for i in range(8):
            #distance to walls
            move = probeDirMap[i]
            #move = probeDirMap[lastDir * 2] #FIXME: test this - only looks in current direction
            dist = 0
            pos = game.data[0]
            while True:
                pos = (pos[0] + move[0], pos[1] + move[1])
                if pos[0] < 0 or pos[0] >= game.size[0] or pos[1] < 0 or pos[1] >= game.size[1]:
                    break
                dist += 1
            inputs[i + 8] = dist * 2
        
        for i in range(8):
            #distance to food
            move = probeDirMap[i]
            dist = 0
            pos = game.data[0]
            while True:
                pos = (pos[0] + move[0], pos[1] + move[1])
                if pos[0] < 0 or pos[0] >= game.size[0] or pos[1] < 0 or pos[1] >= game.size[1]:
                    dist = 25
                    break
                dist += 1
                if pos == game.food:
                    break
            if dist < shortestDistanceToFood:
                shortestDistanceToFood = dist
            inputs[i + 16] = dist * 2
            #inputs[i + 16] = 100
        
        for i in range(4):
            #inputs are inverted
            if lastDir == i:
                inputs[i + 24] = 0
            else:
                inputs[i + 24] = 100
        
        newDir = lastDir
        outputs = net.process(inputs)
        for i in range(4):
            if outputs[i]:
                newDir = i
        
        if not (lastDir == 0 and newDir == 2) and not (lastDir == 2 and newDir == 0) and not (lastDir == 1 and newDir == 3) and not (lastDir == 3 and newDir == 1):
            game.go(newDir)
        else:
            newDir = lastDir
        
        if lastDir != newDir:
            lastDir = newDir
        
        lfood = game.food
        
        ok = game.processFrame()
        if not ok:
            break
        
        if game.food != lfood: #FIXME
            foodBonus += 25 #works now that we're not using len(game.data) in the score calculation
            shortestDistanceToFood = 25
        
        if screen != None:
            screen.blit(game.render(), (0, 0))
            pygame.display.flip()
            time.sleep(0.01)
        
        frameCount += 1
    
    return ((25 - shortestDistanceToFood) * 2) + (foodBonus * 3) #reward extra for getting the food