Beispiel #1
0
    def get_action(self, state: GameState) -> GameAction:

        # Very similar to greedy agent get action, but now instead of picking an action with highest value we pick
        # action with highest avg value, so in avg our snake will do good
        best_actions = state.get_possible_actions(player_index=self.player_index)
        best_value = -np.inf
        for action in state.get_possible_actions(player_index=self.player_index):
            avg_value = 0
            actions_len = 0
            for opponents_actions in state.get_possible_actions_dicts_given_action(action,
                                                                                   player_index=self.player_index):
                opponents_actions[self.player_index] = action
                next_state = get_next_state(state, opponents_actions)
                h_value = _heuristic_for_tournament(next_state, self.player_index)
                avg_value += h_value
                actions_len += 1
                if len(state.opponents_alive) > 2:
                    # consider only 1 possible opponents actions to reduce time & memory:
                    break
            avg_value /= actions_len
            # choosing action according to the avg value we got preforming this action
            if avg_value > best_value:
                best_value = avg_value
                best_actions = [action]
            elif avg_value == best_value:
                best_actions.append(action)

        return np.random.choice(best_actions)
Beispiel #2
0
 def get_action(self, state: GameState) -> GameAction:
     # init with all possible actions for the case where the agent is alone. it will (possibly) be overridden later
     start = time.time()
     best_actions = state.get_possible_actions(
         player_index=self.player_index)
     best_value = -np.inf
     for action in state.get_possible_actions(
             player_index=self.player_index):
         for opponents_actions in state.get_possible_actions_dicts_given_action(
                 action, player_index=self.player_index):
             opponents_actions[self.player_index] = action
             next_state = get_next_state(state, opponents_actions)
             h_value = self._heuristic(next_state)
             if h_value > best_value:
                 best_value = h_value
                 best_actions = [action]
             elif h_value == best_value:
                 best_actions.append(action)
             if len(state.opponents_alive) > 2:
                 # consider only 1 possible opponents actions to reduce time & memory:
                 break
     end = time.time()
     self.counter_steps += 1
     self.avg_time = ((end - start) + self.avg_time *
                      (self.counter_steps - 1)) / self.counter_steps
     return np.random.choice(best_actions)
Beispiel #3
0
def heuristic(state: GameState, player_index: int) -> float:
    """
    Computes the heuristic value for the agent with player_index at the given state
    :param state:
    :param player_index: integer. represents the identity of the player. this is the index of the agent's snake in the
    state.snakes array as well.
    :return:
    """
    #c
    if not state.snakes[player_index].alive:  #we never want our snake to die
        return -500
    #setting weights
    too_long = 8
    fruit_weight = 1.4
    weight_for_length = 500
    board_factor = np.sqrt(state.board_size.width**2 +
                           state.board_size.height**2)
    snake_length = state.snakes[player_index].length
    turns_left = (state.game_duration_in_turns - state.turn_number)
    possible_fruits = min(
        len(state.fruits_locations) +
        sum([s.length for s in state.snakes if s.alive]), turns_left)
    if (possible_fruits > 0):
        bonus_for_length = weight_for_length * snake_length / possible_fruits
    else:
        bonus_for_length = weight_for_length
    #calculating manheten distance and normalizing for board
    bonus_for_avoiding_tail = cityblock(
        state.snakes[player_index].head,
        state.snakes[player_index].tail_position) / np.sqrt(
            state.board_size.width**2 + state.board_size.height**2)
    avoiding_tail_weight = 1 - 1 / snake_length if snake_length > too_long else 0
    bonus_for_avoiding_tail *= avoiding_tail_weight

    #distinguishing between two game modes eating fruits and surviving
    if len(state.fruits_locations) > 0:
        nearest_fruits_weight = min([
            cityblock(state.snakes[player_index].head, trophy_i)
            for trophy_i in state.fruits_locations
        ])
        nearest_fruit_bonus = state.board_size.height + state.board_size.width - nearest_fruits_weight
        nearest_fruit_bonus /= (state.board_size.height +
                                state.board_size.width)  #normalize
        nearest_fruit_bonus *= fruit_weight
        return nearest_fruit_bonus + bonus_for_length + avoiding_tail_weight
    else:
        weight = 1.8
        distance_from_enemy_bonus = min(
            cityblock(state.snakes[player_index].head,
                      state.snakes[enemy].head)
            for enemy in state.get_opponents_alive(player_index)) if len(
                state.get_opponents_alive(player_index)) > 0 else 0
        distance_from_enemy_bonus /= board_factor  #normalize
        return bonus_for_length * weight + bonus_for_avoiding_tail * weight + distance_from_enemy_bonus
Beispiel #4
0
 def get_action(self, state: GameState) -> GameAction:
     best_value = -np.inf
     best_actions = state.get_possible_actions(player_index=self.player_index)
     for action in state.get_possible_actions(player_index=self.player_index):
         next_state = self.TurnBasedGameState(state, action)
         max_value = self.RB_minimax(next_state, state.depth-1)
         if max_value > best_value:
             best_value = max_value
             best_actions = [action]
         elif best_value == max_value:
             best_actions.append(action)
     return np.random.choice(best_actions)
Beispiel #5
0
 def get_action(self, state: GameState) -> GameAction:
     max_value = np.NINF
     best_action = None
     for action in state.get_possible_actions(self.player_index):
         value = self.rb_minimax(self.TurnBasedGameState(state, action), 2)
         max_value = max(max_value, value)
         best_action = action if max_value == value else best_action
     return best_action
Beispiel #6
0
    def get_action(self, state: GameState) -> GameAction:
        if self.is_trap(state):
            return self.trap_escape(state)
        # init with all possible actions for the case where the agent is alone. it will (possibly) be overridden later
        best_actions = state.get_possible_actions(
            player_index=self.player_index)
        best_value = -np.inf
        for action in state.get_possible_actions(
                player_index=self.player_index):
            for opponents_actions in state.get_possible_actions_dicts_given_action(
                    action, player_index=self.player_index):
                opponents_actions[self.player_index] = action
                next_state = get_next_state(state, opponents_actions)
                h_value = self.tournament_heuristic(next_state)
                if h_value > best_value:
                    best_value = h_value
                    best_actions = [action]
                elif h_value == best_value:
                    best_actions.append(action)

        return np.random.choice(best_actions)
Beispiel #7
0
 def get_action(self, state: GameState) -> GameAction:
     game_state = self.TurnBasedGameState(state, None)
     actions = state.get_possible_actions(player_index=self.player_index)
     best_action = GameAction.STRAIGHT  # default action. will be changed
     best_value = -np.inf
     for action in actions:
         game_state.agent_action = action
         value = self._RB_alpha_beta(game_state, self.Turn.OPPONENTS_TURN,
                                     self.DEPTH, -np.inf, np.inf)
         if value > best_value:
             best_action = action
             best_value = value
     return best_action
Beispiel #8
0
 def get_action(self, state: GameState) -> GameAction:
     cur_time = time.clock()
     cur_max = -np.inf
     max_state = GameAction(1)
     for action in state.get_possible_actions(self.player_index):
         state_after_turn = MinimaxAgent.TurnBasedGameState(state, action)
         state_value = self.__RB_Minimax__(state_after_turn, 2)
         if state_value > cur_max:
             cur_max = state_value
             max_state = action
     end_time = time.clock()
     self.time += end_time - cur_time
     self.num_played += 1
     return max_state
 def get_action(self, state: GameState) -> GameAction:
     if self.curr_turn is None:
         self.curr_turn = self.TurnBasedGameState(state, None)
     best_value = -np.inf
     best_actions = []
     self.curr_turn.curr_turn = MinimaxAgent.Turn.OPPONENTS_TURN
     for our_action in state.get_possible_actions(
             player_index=self.player_index):
         h_value = self.minimax(self.TurnBasedGameState(state, our_action),
                                2)
         if h_value > best_value:
             best_value = h_value
             best_actions = [our_action]
         elif h_value == best_value:
             best_actions.append(our_action)
     return np.random.choice(best_actions)
 def get_action(self, state: GameState) -> GameAction:
     # Insert your code here...
     max_actions = []
     best_value = -np.inf
     for action in state.get_possible_actions(
             player_index=self.player_index):
         turn_next_state = MinimaxAgent.TurnBasedGameState(state, action)
         min_max_value = self.alpha_beta_value(turn_next_state,
                                               MinimaxAgent.Turn.AGENT_TURN,
                                               2, float('-inf'),
                                               float('inf'))
         if min_max_value > best_value:
             best_value = min_max_value
             max_actions = [action]
         elif min_max_value == best_value:
             max_actions.append(action)
     return np.random.choice(max_actions)
Beispiel #11
0
    def get_action(self, state: GameState) -> GameAction:
        # Insert your code here...
        start = time.time()
        choose_max = -np.inf

        max_action = GameAction.LEFT
        for agent_action in state.get_possible_actions(
                player_index=self.player_index):
            head_tree = self.TurnBasedGameState(
                state, agent_action)  #possible opponent action
            current_action_max = self.minimax(head_tree, 1)

            #print(choose_max, curr_result, "\n")
            if choose_max < current_action_max:
                choose_max = current_action_max
                max_action = agent_action
        end = time.time()
        self.counter_steps += 1
        self.avg_time = ((end - start) + self.avg_time *
                         (self.counter_steps - 1)) / self.counter_steps
        return max_action
        pass
Beispiel #12
0
 def get_action(self, state: GameState) -> GameAction:
     start_time = time.clock()
     max_value = -np.inf
     maxi_action = GameAction(0)
     all_actions = state.get_possible_actions(self.player_index)
     for action in all_actions:
         curr_value = self.get_action_wrapper(
             MinimaxAgent.TurnBasedGameState(state, action), self.dep,
             -np.inf, np.inf)
         if curr_value > max_value:
             max_value = curr_value
             maxi_action = action
     stop_time = time.clock()
     #environment.time_elapsed += stop_time - start_time
     self.time += stop_time - start_time
     self.num_played += 1
     avg_turn = self.time / self.num_played
     if ((avg_turn) > 60 / 500 and self.dep > 2):
         self.dep -= 1
     elif (
         (avg_turn) < 45 / 500
     ):  #if we have extra time we can allow ourselves to allow more depth
         self.dep += 1
     return maxi_action
Beispiel #13
0
    def get_action(self, state: GameState) -> GameAction:
        D_arr = [2, 3, 4]

        best_value = -np.inf
        best_actions = []
        i = 0
        for our_action in state.get_possible_actions(
                player_index=self.player_index):
            t = time.time()
            h_value = self.abminimax(
                self.TurnBasedGameState(state, our_action), D_arr[i], -np.inf,
                np.inf)
            elapsed = time.time() - t
            if elapsed < 15 and i < 2:
                i += 1
            if elapsed > 20 and i > 0:
                i -= 1
            # elif elapsed
            if h_value > best_value:
                best_value = h_value
                best_actions = [our_action]
            elif h_value == best_value:
                best_actions.append(our_action)
        return np.random.choice(best_actions)
Beispiel #14
0
def _get_reachable_area(state: GameState, player_index: int) -> float:
    snake = state.snakes[player_index]
    # gets the boards of player index snake positions
    snake_board = state.get_board(player_index)[0]
    # dfs will find the area reachable
    return _dfs(snake_board, snake.head, True)
def heuristic(state: GameState, player_index: int) -> float:
    """
    Computes the heuristic value for the agent with player_index at the given state
    :param state:
    :param player_index: integer. represents the identity of the player. this is the index of the agent's snake in the
    state.snakes array as well.
    :return:
    """
    # Insert your code here...

    if not state.snakes[player_index].alive:
        return state.snakes[player_index].length
    discount_factor = 0.5

    max_possible_fruits = len(state.fruits_locations) + sum([
        s.length for s in state.snakes if s.index != player_index and s.alive
    ])
    turns_left = (state.game_duration_in_turns - state.turn_number)
    max_possible_fruits = min(max_possible_fruits, turns_left)
    optimistic_future_reward = discount_factor * (
        1 - discount_factor**max_possible_fruits) / (1 - discount_factor)
    original_greedy_value = state.snakes[
        player_index].length + optimistic_future_reward
    best_dist = state.board_size.width + state.board_size.height
    for fruit in state.fruits_locations:
        d_x = abs(state.snakes[player_index].head[0] - fruit[0])
        d_y = abs(state.snakes[player_index].head[1] - fruit[1])
        manhattan_dist = d_x + d_y
        if 0 <= manhattan_dist < best_dist:
            best_dist = manhattan_dist
    # better bonus for lower best_dist
    if best_dist != 0:
        bonus_dist_fruit = optimistic_future_reward / best_dist
    else:
        bonus_dist_fruit = optimistic_future_reward
    # better bonus if next state isn't a border
    bonus_border = 0
    head_x = state.snakes[player_index].head[0]
    head_y = state.snakes[player_index].head[1]
    radios = 1
    if not state.is_within_grid_boundaries((head_x + radios, head_y)) or not state.is_within_grid_boundaries((head_x, head_y+radios))\
            or not state.is_within_grid_boundaries((head_x - radios, head_y)) or not state.is_within_grid_boundaries((head_x, head_y-radios)):
        bonus_border -= optimistic_future_reward / (best_dist + radios)
    # if next move isn't a snake's body then bonus 0 else minus optimistic_future_reward
    bonus_is_snake_in_cell = 0
    for s in state.snakes:
        if not s.index == player_index:
            if s.is_in_cell((head_x + radios, head_y)) or s.is_in_cell((head_x, head_y + radios)) \
                    or s.is_in_cell((head_x - radios, head_y)) or s.is_in_cell((head_x, head_y - radios)):
                bonus_border -= optimistic_future_reward / (best_dist)
                break
        elif state.snakes[player_index].is_in_cell((head_x+radios, head_y)) or \
            state.snakes[player_index].is_in_cell((head_x, head_y+radios))\
            or state.snakes[player_index].is_in_cell((head_x - radios, head_y)) \
                or state.snakes[player_index].is_in_cell((head_x, head_y - radios)):
            bonus_border -= optimistic_future_reward
            break

    ret_h_val = float(original_greedy_value + bonus_dist_fruit + bonus_border +
                      bonus_is_snake_in_cell)
    return ret_h_val