예제 #1
0
    def testRunHitSnakeAndDie(self, mock_random):
        mock_input_interface = Mock()
        fake_output = FakeOutput()

        mock_input_interface.get_next_action.side_effect = [
            Direction.Y_NEGATIVE, Direction.X_NEGATIVE, Direction.Y_POSITIVE,
            Direction.Y_POSITIVE, Direction.X_POSITIVE, Direction.Y_NEGATIVE,
            Direction.X_NEGATIVE
        ]

        mock_random.side_effect = [2, 1, 2, 0, 1, 0, 1, 1, 1, 2, 2, 2, 0, 0]
        SnakeGame.run(mock_input_interface, fake_output, 3, 3)

        self.assertEqual(fake_output.game_results, [False])

        fake_output.verify_game_map(
            self,
            len(fake_output.drawn_maps) - 1, {
                (1, 1): Snake,
                (2, 1): Snake,
                (2, 0): Snake,
                (1, 0): Snake,
                (1, 2): Snake,
                (2, 2): Snake,
                (0, 0): Food
            })
예제 #2
0
def train(iters, warm_start=False, verbose=False, learning_rate=0.8, gamma=0.8, epsilon=0.2,
          dont_repeat=False, name="snake_ai.pkl"):
    """
    QLearn usage example training in the Snake environment

    """
    if warm_start:
        ai = joblib.load(name)
    else:
        ai = QLearn([0, 1, 2, 3])
    ai.learning_rate = learning_rate
    ai.gamma = gamma
    ai.epsilon = epsilon
    ai.verbose = verbose
    ai.no_repeat = dont_repeat
    evals = []
    bu_iter = 100
    for i in range(1, iters + 1):
        game = SnakeGame()
        ai = game.demo(ai, light_mode=True)
        evals.append(np.sum(np.array([v for v in ai.memory.values()])))
        plt.plot(evals, c="b")
        plt.pause(0.05)
        if not i % bu_iter:
            joblib.dump(ai, name)
    joblib.dump(ai, name)
예제 #3
0
 def initial_population(self):
     training_data = []
     for _ in range(self.initial_games):
         game = SnakeGame()
         _, prev_score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         prev_food_distance = self.get_food_distance(snake, food)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, score, snake, food = game.step(game_action)
             if done:
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), -1
                 ])
                 break
             else:
                 food_distance = self.get_food_distance(snake, food)
                 if score > prev_score or food_distance < prev_food_distance:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 1
                     ])
                 else:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 0
                     ])
                 prev_observation = self.generate_observation(snake, food)
                 prev_food_distance = food_distance
     return training_data
예제 #4
0
 def initial_population(self):
     training_data = []
     print('Creating initial population out of %s games...' %
           self.initial_games)
     for _ in range(self.initial_games):
         game = SnakeGame()
         _, _, snake, _ = game.start()
         prev_observation = self.generate_observation(snake)
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, _, snake, _ = game.step(game_action)
             if done:
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), 0
                 ])
                 break
             else:
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), 1
                 ])
                 prev_observation = self.generate_observation(snake)
     print('Training data size: %s' % len(training_data))
     return training_data
예제 #5
0
 def test_model(self, model):
     steps_arr = []
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         _, _, snake, _ = game.start()
         prev_observation = self.generate_observation(snake)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(-1, 2):
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 4, 1)))
             action = np.argmax(np.array(predictions))
             game_action = self.get_game_action(snake, action - 1)
             done, _, snake, _ = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 break
             else:
                 prev_observation = self.generate_observation(snake)
                 steps += 1
         steps_arr.append(steps)
     print('Average steps:', mean(steps_arr))
     print(Counter(steps_arr))
예제 #6
0
 def __init__(self, step_limit=None):
     self._action_spec = array_spec.BoundedArraySpec(
             shape=(), dtype=np.int32, minimum=0, maximum=3, name='action')
     self._observation_spec = array_spec.BoundedArraySpec(
             shape=(100,), dtype=np.int32, minimum=0, name="observation")
     self._game = SnakeGame(size=10)
     self._episode_ended = False
     self._reward_count = 0
     self._step_limit = step_limit
     self._step_count = 0
예제 #7
0
    def testRunOutOfSpace(self):
        mock_input_interface = Mock()
        fake_output = FakeOutput()
        mock_input_interface.get_next_action.return_value = Direction.X_POSITIVE

        SnakeGame.run(mock_input_interface, fake_output, 2, 1)

        self.assertEqual(fake_output.game_results, [True])
        self.assertEqual(2, len(fake_output.drawn_maps))

        fake_output.verify_game_map(self, 1, {(0, 0): Snake, (1, 0): Snake})
예제 #8
0
파일: search.py 프로젝트: m-tosch/Snake-AI
 def test(self):
     print('--- test ---')
     start = time.time()
     steps_arr = [0]
     scores_arr = [0]
     for i in range(self.test_games):
         steps = 0
         game = SnakeGame()
         _, score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         for _ in range(self.goal_steps):
             game_action = self.get_game_action(game)
             done, score, snake, food = game.step(game_action)
             if done:
                 if prev_observation[0] != 1 or prev_observation[
                         1] != 1 or prev_observation[2] != 1:
                     action_str = 'UP'
                     if game_action == 1:
                         action_str = 'RIGHT'
                     elif game_action == 2:
                         action_str = 'DOWN'
                     elif game_action == 3:
                         action_str = 'LEFT'
                     print(
                         str(i) + '/' + str(self.test_games) + ' ' +
                         str(prev_observation) + ' ' + action_str + ' [' +
                         str(round(mean(steps_arr), 2)) + ', ' +
                         str(round(mean(scores_arr), 2)) + ']')
                 break
             else:
                 prev_observation = self.generate_observation(snake, food)
                 steps += 1
         print('game: ' + str(i + 1) + '/' + str(self.test_games) + ' [' +
               str(round(((i + 1) / self.test_games) * 100, 1)) + '%]' +
               ' goal_steps: ' + str(self.goal_steps) + ' time: ' +
               str(round(time.time() - start, 3)) + 's',
               end='\r')
         steps_arr.append(steps)
         scores_arr.append(score)
     end = time.time()
     avg_steps = mean(steps_arr)
     avg_score = mean(scores_arr)
     print('game: ' + str(i + 1) + '/' + str(self.test_games) + ' [' +
           str(round(((i + 1) / self.test_games) * 100, 1)) + '%]' +
           ' goal_steps: ' + str(self.goal_steps) + ' time: ' +
           str(round(end - start, 3)) + 's')
     print('steps: avg=' + str(round(avg_steps, 2)) + ' max=' +
           str(max(steps_arr)) + ' min=' + str(min(steps_arr)))
     print('score: avg=' + str(round(avg_score, 2)) + ' max=' +
           str(max(scores_arr)) + ' min=' + str(min(scores_arr)))
     print(
         time.strftime("Total time elapsed: %H:%M:%S",
                       time.gmtime(end - start)))
예제 #9
0
class SnakeEnv(py_environment.PyEnvironment):
    def __init__(self, step_limit=None):
        self._action_spec = array_spec.BoundedArraySpec(
                shape=(), dtype=np.int32, minimum=0, maximum=3, name='action')
        self._observation_spec = array_spec.BoundedArraySpec(
                shape=(100,), dtype=np.int32, minimum=0, name="observation")
        self._game = SnakeGame(size=10)
        self._episode_ended = False
        self._reward_count = 0
        self._step_limit = step_limit
        self._step_count = 0

    def action_spec(self):
        return self._action_spec

    def observation_spec(self):
        return self._observation_spec
    
    def _reset(self):
        self._episode_ended = False
        self._step_count = 0
        self._reward_count = 0
        obs = self._game.reset()
        return ts.restart(obs.flatten())
    
    def _step(self, action):
        if self._episode_ended:
            return self.reset()

        self._step_count += 1
        obs, reward, terminal = self._game.step(action)
        obs = obs.flatten()

        if terminal:
            self._episode_ended = True

        self._reward_count += reward
        # Stop if we have gotten 1000 treats
        if self._reward_count >= 1000:
            return ts.termination(obs, reward)

        # Reset how long we have to live if we get a treat
        if reward != 0:
            self._step_count = 0

        if self._step_limit is not None and self._step_count > self._step_limit:
            self._episode_ended = True

        if self._episode_ended:
            return ts.termination(obs, reward)

        return ts.transition(obs, reward, discount=1.0)
예제 #10
0
    def testRunHitWallAndDie(self, mock_random):
        mock_input_interface = Mock()
        mock_input_interface.get_next_action.return_value = Direction.X_POSITIVE
        fake_output = FakeOutput()

        mock_random.side_effect = [0, 0]

        SnakeGame.run(mock_input_interface, fake_output, 3, 3)
        self.assertEqual(fake_output.game_results, [False])
        self.assertEqual(2, len(fake_output.drawn_maps))

        fake_output.verify_game_map(self, 0, {(1, 1): Snake, (0, 0): Food})
        fake_output.verify_game_map(self, 1, {(2, 1): Snake, (0, 0): Food})
예제 #11
0
파일: nn.py 프로젝트: m-tosch/Snake-AI
 def initial_population(self, num_games):
     print('--- initial_population ---')
     start = time.time()
     training_data = []
     for i in range(num_games):
         game = SnakeGame()
         _, prev_score, snake, food = game.start()
         prev_observation = self.generate_observation(
             snake, food
         )  # [1/0, 1/0, 1/0, angle] obstacle left, front, right + food angle
         prev_food_distance = self.get_food_distance(snake, food)
         for j in range(self.max_steps):
             action, game_action = self.generate_action(
                 snake)  # action -1/0/1  game_action 0/1/2/3
             done, score, snake, food = game.step(game_action)
             if done:
                 # left,forward,right | obst.left,front,right | angle | survived
                 # [ array([-1/0/1, 1/0, 1/0, 1/0 -1-to-1]), -1/0/1 ]
                 training_data.append([
                     self.add_action_to_observation(action,
                                                    prev_observation), -15
                 ])  # -1 snake didn't survive
                 break
             else:
                 food_distance = self.get_food_distance(snake,
                                                        food)  # always >=1
                 if score > prev_score or food_distance < prev_food_distance:
                     training_data.append([
                         self.add_action_to_observation(
                             action, prev_observation), 1
                     ])  # 1 snake survived and right direction
                 else:
                     training_data.append([
                         self.add_action_to_observation(
                             action, prev_observation), 0
                     ])  # 0 snake survived but wrong direction
                 prev_observation = self.generate_observation(snake, food)
                 prev_food_distance = food_distance
                 prev_score = score
         print(' game: ' + str(i + 1) + '/' + str(num_games) + ' [' +
               str(round(((i + 1) / num_games) * 100, 1)) + '%]' +
               ' max_steps: ' + str(self.max_steps) + ' time: ' +
               str(round(time.time() - start, 3)) + 's',
               end='\r')
     end = time.time()
     print(' game: ' + str(i + 1) + '/' + str(num_games) + ' [' +
           str(round(((i + 1) / num_games) * 100, 1)) + '%]' +
           ' max_steps: ' + str(self.max_steps) + ' time: ' +
           str(round(time.time() - start, 3)) + 's')
     return training_data
예제 #12
0
 def visualise_game(self, model):
     game = SnakeGame(gui = True)
     _, _, snake, food = game.start()
     prev_observation = self.generate_observation(snake, food)
     for _ in range(self.goal_steps):
         precictions = []
         for action in range(-1, 2):
            precictions.append(model.predict(self.add_action_to_observation(prev_observation, action).reshape(-1, 5, 1)))
         action = np.argmax(np.array(precictions))
         game_action = self.get_game_action(snake, action - 1)
         done, _, snake, food  = game.step(game_action)
         if done:
             break
         else:
             prev_observation = self.generate_observation(snake, food)
예제 #13
0
    def test_model(self, model, print_stats, save_obs, print_avrg=True):
        steps_arr = []
        scores_arr = []
        test_observations = []
        # Цикл по играм
        for i in range(self.tests):
            steps = 0

            game = SnakeGame()
            _isdone, score, snake, food = game.start()
            prev_observation = self.generate_observation(snake, food)
            # Цикл по шагам
            for j in range(self.max_steps):
                step_predictions = []
                # Перебор предсказаний для разных действий
                for action in range(-1, 2):
                    step_predictions.append(
                        model.predict(self.merge_action_and_observation(prev_observation, action).reshape(-1, 5, 1)))
                # Выбор действия с наибольшей ценностью
                action = np.argmax(np.array(step_predictions))
                game_action = self.get_game_action(snake, action - 1)
                # Совершаем выбранное действие
                isdone, score, snake, food = game.step(game_action)
                # Сохраняем текущее действие
                if save_obs:
                    test_observations.append([self.merge_action_and_observation(prev_observation, action), float(step_predictions[action])])
                # если игра закончена то печатаем результаты
                if isdone:
                    if print_stats:
                        print('#####################################')
                        print('steps:' + str(steps))
                        print('snake length:' + str(len(snake)))
                        # print('last step_predictions;' + str(step_predictions))
                    break
                else:
                    prev_observation = self.generate_observation(snake, food)
                    steps += 1
            steps_arr.append(steps)
            scores_arr.append(score)
        if save_obs:
            save_list(observations=test_observations, filename=self.test_filename)

        # Печать средних значений по всем играм
        if print_avrg:
            print('Average steps:', mean(steps_arr))
            print(Counter(steps_arr))
            print('Average score:', mean(scores_arr))
            print(Counter(scores_arr))
예제 #14
0
def new_game():
    """New game and old game screen reset"""
    global new_snake_game
    # accessing the old global snake game to reset screen
    new_snake_game.screen.resetscreen()
    # new game instance
    new_snake_game = SnakeGame()
예제 #15
0
 def visual_test_model(self, model):
     game = SnakeGame(gui=True)
     isdone, score, snake, food = game.start()
     prev_observation = self.generate_observation(snake, food)
     for i in range(self.max_steps):
         step_predictions = []
         for action in range(-1, 2):
             step_predictions.append(
                 model.predict(self.merge_action_and_observation(prev_observation, action).reshape(-1, 5, 1)))
         action = np.argmax(np.array(step_predictions))
         game_action = self.get_game_action(snake, action - 1)
         done, score, snake, food = game.step(game_action)
         if done:
             break
         else:
             prev_observation = self.generate_observation(snake, food)
예제 #16
0
    def __init__(self, size, max_without_eating=300, mode='standard'):
        super(SnakeEnv, self).__init__()
        self.max_without_eating = max_without_eating
        self.steps_without_apple = 0

        if mode == 'standard':
            self.action_space = spaces.Discrete(4)
        elif mode == 'pov':
            self.action_space = spaces.Discrete(3)
        else:
            raise ValueError('Uknown mode: ' + str(mode))

        self.observation_space = spaces.Box(low=0,
                                            high=1,
                                            shape=(size[0], size[1], 3),
                                            dtype=np.uint8)
        self.game = SnakeGame(size, controls=mode)
예제 #17
0
파일: nn.py 프로젝트: m-tosch/Snake-AI
 def test_model(self, model, n):
     print('--- test_model ---')
     start = time.time()
     steps_arr = [0]
     scores_arr = [0]
     for i in range(round(self.test_games / n)):
         steps = 0
         game = SnakeGame()
         _, score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         for _ in range(self.max_steps):
             predictions = []
             for action in range(-1, 2):
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             action, prev_observation).reshape(-1, 5, 1)))
             action = np.argmax(np.array(predictions))
             game_action = self.get_game_action(snake, action - 1)
             done, score, snake, food = game.step(game_action)
             if done:
                 break
             else:
                 prev_observation = self.generate_observation(snake, food)
                 steps += 1
         steps_arr.append(steps)
         scores_arr.append(score)
         print('game: ' + str(i + 1) + '/' +
               str(round(self.test_games / n)) + ' steps_avg=' +
               str(round(mean(steps_arr), 2)) + ' score_avg=' +
               str(round(mean(scores_arr), 2)),
               end='\r')
     end = time.time()
     avg_steps = mean(steps_arr)
     avg_score = mean(scores_arr)
     print('game: ' + str(i + 1) + '/' + str(round(self.test_games / n)) +
           ' steps_avg=' + str(round(mean(steps_arr), 2)) + ' score_avg=' +
           str(round(mean(scores_arr), 2)))
     print('steps: avg=' + str(round(avg_steps, 2)) + ' max=' +
           str(max(steps_arr)) + ' min=' + str(min(steps_arr)))
     print('score: avg=' + str(round(avg_score, 2)) + ' max=' +
           str(max(scores_arr)) + ' min=' + str(min(scores_arr)))
     print(time.strftime("Time elapsed: %H:%M:%S",
                         time.gmtime(end - start)))
     return avg_steps, avg_score
예제 #18
0
def main():
    if settings.input_interface == 'Keyboard':
        input_interface = Keyboard(settings.time_step_seconds)
    elif settings.input_interface == 'ML':
        input_interface = MLInput()
    else:
        raise RuntimeError(
            f'Unknown input interface {settings.input_interface}')

    if settings.output_interface == 'Text':
        output_interface = TextOutput()
    elif settings.output_interface == 'Pygame':
        output_interface = PygameOutput()
    else:
        raise RuntimeError(
            f'Unknown output interface {settings.output_interface}')

    SnakeGame.run(input_interface, output_interface, settings.dim_x,
                  settings.dim_y)
예제 #19
0
 def initial_population(self):
     training_data = []
     # Play amount of games equal to value of initial games
     for _ in range(self.initial_games):
         # Create new game, for reference of game setup look at snake_game.py start() function.
         game = SnakeGame()
         _, prev_score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         prev_food_distance = self.get_food_distance(snake, food)
         # Run until amount of moves is equal to value of goal_steps or until failure state is reached.
         for _ in range(self.goal_steps):
             action, game_action = self.generate_action(snake)
             done, score, snake, food = game.step(game_action)
             # If game has ended.
             if done:
                 # Add data from this current attempt to the training data and mark as complete either goal
                 # steps have been reached or snake has died, mark as failure (-1).
                 training_data.append([
                     self.add_action_to_observation(prev_observation,
                                                    action), -1
                 ])
                 break
             # If not done keep playing.
             else:
                 food_distance = self.get_food_distance(snake, food)
                 # If the current score is higher than the previous score or the food is closer add to training data
                 # marked as an optimal move (1).
                 if score > prev_score or food_distance < prev_food_distance:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 1
                     ])
                 # Otherwise mark as unoptimal move (0) and add to training data.
                 else:
                     training_data.append([
                         self.add_action_to_observation(
                             prev_observation, action), 0
                     ])
                 # Mark previous observation and food distance as latest values.
                 prev_observation = self.generate_observation(snake, food)
                 prev_food_distance = food_distance
     return training_data
예제 #20
0
class SnakeEnv(gym.Env):
    """Open AI Snake Environment"""
    metadata = {'render.modes': ['human']}

    def __init__(self, size, max_without_eating=300, mode='standard'):
        super(SnakeEnv, self).__init__()
        self.max_without_eating = max_without_eating
        self.steps_without_apple = 0

        if mode == 'standard':
            self.action_space = spaces.Discrete(4)
        elif mode == 'pov':
            self.action_space = spaces.Discrete(3)
        else:
            raise ValueError('Uknown mode: ' + str(mode))

        self.observation_space = spaces.Box(low=0,
                                            high=1,
                                            shape=(size[0], size[1], 3),
                                            dtype=np.uint8)
        self.game = SnakeGame(size, controls=mode)

    def step(self, action):
        self.game.update(action)

        if self.game.ate_apple:
            reward = 1
            self.steps_without_apple = 0
        elif self.game.snake.dead:
            reward = -1
        else:
            reward = 0
            self.steps_without_apple += 1

        done = self.steps_without_apple > self.max_without_eating or self.game.snake.dead

        return self.game.get_state(), reward, done, {}

    def reset(self):
        self.game.reset()
        self.explored = np.zeros_like(self.explored)
        self.steps_without_apple = 0
        return self.game.get_state()

    def render(self, mode='human', close=False):
        if mode == 'human':
            time.sleep(0.1)
            self.game.render(mode=mode)

    def seed(self, seed=None):
        np.random.seed(seed)
예제 #21
0
 def visualise_game(self, model):
     game = SnakeGame(gui=True)
     _, _, snake, _ = game.start()
     prev_observation = self.generate_observation(snake)
     for _ in range(self.goal_steps):
         predictions = []
         for action in range(-1, 2):
             predictions.append(
                 model.predict(
                     self.add_action_to_observation(prev_observation,
                                                    action).reshape(
                                                        -1, 4, 1)))
         action = np.argmax(np.array(predictions))
         game_action = self.get_game_action(snake, action - 1)
         done, _, snake, _ = game.step(game_action)
         if done:
             break
         else:
             prev_observation = self.generate_observation(snake)
         # delay between steps for better perception
         time.sleep(0.2)
예제 #22
0
def main(n,
         generations_num,
         load_and_play,
         model_filename,
         elite_size,
         debug_mode=False):
    if load_and_play == 0:
        run_generations(n, generations_num, model_filename, elite_size)
    else:
        parent = SnakeNetwork()
        parent.model = load_saved_model(model_filename)
        game = SnakeGame(gui=True)
        x = game.start()
        prev_score = score = 0
        prev_j = 0
        x_array = []
        x1_array = []
        end = 0
        for j in range(400):
            [x, score, end] = game.step(parent.predict_action(x))
            if score > prev_score:
                prev_score = score
                prev_j = j
            if end > 0 or j - 100 > prev_j:
                break
            if debug_mode:
                x_array.append(x)
                x1_array.append(parent.model.predict(x))
        if end == 0: game.end_game()
        if debug_mode:
            print(x_array)
            print(x1_array)
예제 #23
0
 def test_model(self, model):
     steps_arr = []
     scores_arr = []
     print('Testing model on %s test games' % self.test_games)
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         _, score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(-1, 2):
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 5, 1)))
             action = np.argmax(np.array(predictions))
             game_action = self.get_game_action(snake, action - 1)
             done, score, snake, food = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 # print('-----')
                 # print(steps)
                 # print(snake)
                 # print(food)
                 # print(prev_observation)
                 # print(predictions)
                 uprint('.', end='')
                 break
             else:
                 prev_observation = self.generate_observation(snake, food)
                 steps += 1
         steps_arr.append(steps)
         scores_arr.append(score)
     print('\nAverage steps:', mean(steps_arr))
     print(Counter(steps_arr))
     print('Average score:', mean(scores_arr))
     print(Counter(scores_arr))
예제 #24
0
    def generate_train_data(self, add_test=False):
        training_data = []
        for i in range(self.trains):
            game = SnakeGame()
            done, prev_score, snake, food = game.start()
            prev_observation = self.generate_observation(snake, food)
            prev_food_distance = self.get_food_distance(snake, food)
            for j in range(self.max_steps):
                # Генерируем действие
                action, game_action = self.generate_action(snake)
                # Меняем состояние игры после совершения действия
                done, score, snake, food = game.step(game_action)
                if done:
                    # Если игра завершена(змейка умерла) то добавить в тренеровочные данные наблюдения с действием и оценкой -1
                    training_data.append([self.merge_action_and_observation(prev_observation, action), -1])
                    break
                else:
                    food_distance = self.get_food_distance(snake, food)
                    # Иначе если счет увеличился или расстояние до еды сократилось
                    if score > prev_score or food_distance < prev_food_distance:
                        # Добавить в тренеровочные данные наблюдения с действием и оценкой 1
                        training_data.append([self.merge_action_and_observation(prev_observation, action), 1])
                    else:
                        # Иначе добавить в тренеровочные данные наблюдения с действием и оценкой 0
                        training_data.append([self.merge_action_and_observation(prev_observation, action), 0])
                    prev_observation = self.generate_observation(snake, food)
                    prev_food_distance = food_distance
        if add_test:
            # Дополнительная догрузка данных с предыдущих тестов
            X = np.load(file=self.test_filename + ' x.npy')
            Y = np.load(file=self.test_filename + ' y.npy')
            X.reshape(-1, 5, 1)
            Y.reshape(-1, 1)

            for i in range(0, len(X) - 1):
                training_data.append([X[i], Y[i]])
        save_list(observations=training_data, filename='train_set')
        # Возвращает массив наблюдений и оценок
        return training_data
예제 #25
0
 def play_test_games(self, model):
     steps_arr = []
     scores_arr = []
     for _ in range(self.test_games):
         steps = 0
         game_memory = []
         game = SnakeGame()
         _, score, snake, food = game.start()
         prev_observation = self.generate_observation(snake, food)
         for _ in range(self.goal_steps):
             predictions = []
             for action in range(-1, 2):
                 predictions.append(
                     model.predict(
                         self.add_action_to_observation(
                             prev_observation, action).reshape(-1, 5, 1)))
             action = np.argmax(np.array(predictions))
             game_action = self.generate_game_action(snake, action - 1)
             done, score, snake, food = game.step(game_action)
             game_memory.append([prev_observation, action])
             if done:
                 print('-----')
                 print("Steps: " + str(steps))
                 print("Snake: " + str(snake))
                 print("Food: " + str(food))
                 print("Previous Observation: " + str(prev_observation))
                 print("Predictions: " + str(predictions))
                 break
             else:
                 prev_observation = self.generate_observation(snake, food)
                 steps += 1
         steps_arr.append(steps)
         scores_arr.append(score)
     print('Average steps:', mean(steps_arr))
     print(Counter(steps_arr))
     print('Average score:', mean(scores_arr))
     print(Counter(scores_arr))
     print('Highest Score:', max(scores_arr))
예제 #26
0
파일: search.py 프로젝트: m-tosch/Snake-AI
 def visualise(self):
     game = SnakeGame(gui=True)
     game.start()
     for _ in range(self.goal_steps):
         game_action = self.get_game_action(game)
         done, score, snake, food = game.step(game_action)
         if done:
             break
     game.end_game()
     print('-----')
     print('snake: ' + str(snake))
     print('food: ' + str(food))
     print('score: ' + str(score))
예제 #27
0
파일: nn.py 프로젝트: m-tosch/Snake-AI
 def visualise_game(self, model):
     game = SnakeGame(gui=True)
     _, score, snake, food = game.start()
     prev_observation = self.generate_observation(snake, food)
     for _ in range(self.max_steps):
         predictions = []
         for action in range(-1, 2):
             predictions.append(
                 model.predict(
                     self.add_action_to_observation(
                         action, prev_observation).reshape(-1, 5, 1)))
         action = np.argmax(np.array(predictions))
         game_action = self.get_game_action(snake, action - 1)
         done, score, snake, food = game.step(game_action)
         if done:
             break
         else:
             prev_observation = self.generate_observation(snake, food)
     game.end_game()
     print('snake: ' + str(snake))
     print('food: ' + str(food))
     print('prev_obs: ' + str(prev_observation))
     print('score: ' + str(score))
예제 #28
0
def play_game(z, n, snakes_population):
    fitness_array = np.zeros(n)
    best_score = 0
    for i, snake in enumerate(snakes_population):
        game = SnakeGame()
        x = game.start()
        prev_score = score = steps = 0
        prev_j = 0
        for j in range((score + 1) * 100):
            [x, score, end] = game.step(snake.predict_action(x))
            if score > prev_score:
                prev_score = score
                prev_j = j
                steps = 0
            steps += 1
            if end > 0 or j - (score + 1) * 50 > prev_j:
                if end == 0: game.end_game()
                break
        if score > best_score:
            best_score = score
        fitness_array[i] = calculate_fitness(score, steps)
        snake.set_fitness(fitness_array[i])
        # print_progress("Snake game", i, len(snakes_population), start)
    return [fitness_array, best_score]
예제 #29
0
# board coordinates- (0, 0) at the top left
import copy
import os
import random as rand
import neat

from snake_game import SnakeGame

seed = rand.random
y = 6
x = 6
snake_game = SnakeGame(y, x, seed)
generations = 1000


def eval_genomes(genomes, config):

    for genome_id, genome in genomes:
        game = copy.deepcopy(snake_game)
        net = neat.nn.FeedForwardNetwork.create(genome, config)

        total_moves = 0
        cont = True
        # moves_since_apple = 0
        # last_score = 0
        #  moves_since_apple > y * x * 2
        while cont and (game.score / 100 + 1) * x * y * 2 > total_moves:
            # last_score = game.score
            output = net.activate(game.get_board().flat)
            index_of_max = output.index(max(output))
            # print('move=' + str(index_of_max))
예제 #30
0
def train(num_trials=40):

    score1 = 0
    score2 = 0
    player1 = 0
    player2 = 0

    for trial in range(num_trials):

        game = SnakeGame(board_size=(20, 25))
        state = game.start_state()
        game.print_board(state)

        while True:
            action = minimax_agent_first_index(game, state)
            state = game.successor(state, action, True)

            if game.is_end(state)[0] == True:

                reward = game.is_end(state)[2] - state[3][1]
                incorporateFeedback(game, state, action, reward, succ)

                break
            game.print_board(state)

            current_dir = state[2][1]
            actions = get_valid(current_dir, game.actions())

            action = get_QL_Action(game, state, actions)

            succ = game.successor(state, action)

            snake = succ[1][1]
            food = state[4]

            reward = succ[3][1] - state[3][1]
            #reward = 100*(succ[3][1]- state[3][1]) -((snake[0][0] - food[0])**2 + (snake[0][1] - food[1])**2)

            result = game.is_end(succ)

            state[0].addstr(28, 10, ' Reward: ' + str(reward) + '     ')
            state[0].addstr(29, 10, ' ScoreNow: ' + str(succ[3][1]) + '     ')
            state[0].addstr(30, 10, ' ScorePrev: ' + str(state[3][1]) + '    ')

            incorporateFeedback(game, state, action, reward, succ)

            game.print_board(state)
            state = succ

            if game.is_end(state)[0] == True:
                break

        global explorationProb
        explorationProb = explorationProb / 2

    curses.endwin()
    '''