예제 #1
0
def _check_if_flame_will_gone(obs, prev_two_obs, flame_pos):
    assert (prev_two_obs[0] is not None)
    assert (prev_two_obs[1] is not None)
    # check the flame group in current obs, see if
    # the whole group was there prev two obs
    # otherwise, although this flame appears in prev two obs,
    # it could be a old overlap new, thus will not gone next step
    if not (utility.position_is_flames(prev_two_obs[0]['board'], flame_pos) \
            and utility.position_is_flames(prev_two_obs[1]['board'], flame_pos)):
        return False
    board = obs['board']
    Q = deque(maxlen=121)
    Q.append(flame_pos)
    visited = [flame_pos]
    dirs = _all_directions(exclude_stop=True)
    while len(Q) > 0:
        pos = Q.popleft()
        if not (utility.position_is_flames(prev_two_obs[0]['board'], pos) \
                and utility.position_is_flames(prev_two_obs[1]['board'], pos)):
            return False
        for d in dirs:
            next_pos = utility.get_next_position(pos, d)
            if utility.position_on_board(board, next_pos) and utility.position_is_agent(board, next_pos):
                if next_pos not in visited:
                    Q.append(next_pos)
                    visited.append(next_pos)
    return True
예제 #2
0
def position_can_be_bomb_through(board, position):
    if utility.position_is_flames(board, position):
        return True
    if utility.position_is_passage(board, position):
        return True
    if utility.position_is_powerup(board, position):
        return True
    return False
예제 #3
0
def _filter_direction_toward_flames(board, my_position, directions, enemies):
    ret = []
    for direction in directions:
        position = utility.get_next_position(my_position, direction)
        if utility.position_on_board(
                board,
                position) and not utility.position_is_flames(board, position):
            ret.append(direction)
    return ret
예제 #4
0
def no_flames(obs):
    res = [0] * 6
    my_position = obs['position']
    board = obs['board']
    x, y = my_position
    for act in dirs:
        next_pos = util.get_next_position(my_position, act)
        if util.position_on_board(board, next_pos) and util.position_is_flames(
                board, next_pos):
            res[act.value] = -1
    return res
예제 #5
0
    def act(self, obs, action_space):

        #
        # Definitions
        #

        self._search_range = 10

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies']]

        #
        # Prepare extended observations
        # - bomb moving direction
        # - flame remaining life
        #

        # Summarize information about bombs
        # curr_bombs : list of current bombs
        # moving_direction : array of moving direction of bombs
        curr_bombs, moving_direction, self._prev_bomb_life \
            = self._get_bombs(obs, self._prev_bomb_life)

        # Summarize information about flames
        curr_flames, self._prev_flame_life \
            = self._get_flames(board, self._prev_flame_life, self._prev_bomb_position_strength)

        # bombs to be exploded in the next step
        self._prev_bomb_position_strength = list()
        rows, cols = np.where(obs["bomb_blast_strength"] > 0)
        for position in zip(rows, cols):
            strength = int(obs["bomb_blast_strength"][position])
            self._prev_bomb_position_strength.append((position, strength))

        #
        # Understand current situation
        #

        # Simulation assuming enemies stay unmoved

        # List of simulated boards
        list_boards_no_move, _ \
            = self._board_sequence(board,
                                   curr_bombs,
                                   curr_flames,
                                   self._search_range,
                                   my_position,
                                   enemy_mobility=0)

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable_no_move, prev_no_move \
            = self._search_time_expanded_network(list_boards_no_move,
                                                 my_position)

        # Items that can be reached in a survivable manner
        reachable_items_no_move, reached_no_move, next_to_items_no_move \
            = self._find_reachable_items(list_boards_no_move,
                                         my_position,
                                         survivable_no_move)

        # Simulation assuming enemies move

        for enemy_mobility in range(3, -1, -1):
            # List of boards simulated
            list_boards, _ = self._board_sequence(board,
                                                  curr_bombs,
                                                  curr_flames,
                                                  self._search_range,
                                                  my_position,
                                                  enemy_mobility=enemy_mobility)

            # List of the set of survivable time-positions at each time
            # and preceding positions
            survivable, prev = self._search_time_expanded_network(list_boards,
                                                                  my_position)

            if len(survivable[1]) > 0:
                # Gradually reduce the mobility of enemy, so we have at least one survivable action
                break

        # Items that can be reached in a survivable manner
        reachable_items, reached, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           curr_bombs,
                                           curr_flames)

        survivable_actions = [a for a in is_survivable if is_survivable[a]]
        
        if verbose:
            print("survivable actions are", survivable_actions)

        # Positions where we kick a bomb if we move to
        kickable = self._kickable_positions(obs, moving_direction)

        print()
        for t in range(0):
            print(list_boards[t])
            print(survivable[t])
            for key in prev[t]:
                print(key, prev[t][key])

        #
        # Choose an action
        #

        """
        # This is not effective in the current form
        if len(survivable_actions) > 1:
            # avoid the position if only one position at the following step
            # the number of positions that can be reached from the next position
            next = defaultdict(set)
            next_count = defaultdict(int)
            for position in survivable[1]:
                next[position] = set([p for p in prev[2] if position in prev[2][p]])
                next_count[position] = len(next[position])
            print("next count", next_count)
            if max(next_count.values()) > 1:
                for position in survivable[1]:
                    if next_count[position] == 1:
                        risky_action = self._get_direction(my_position, position)
                        is_survivable[risky_action] = False
                survivable_actions = [a for a in is_survivable if is_survivable[a]]                
        """

        # Do not stay on a bomb if I can
        if all([obs["bomb_life"][my_position] > 0,
                len(survivable_actions) > 1,
                is_survivable[constants.Action.Stop]]):
            is_survivable[constants.Action.Stop] = False
            survivable_actions = [a for a in is_survivable if is_survivable[a]]

        if len(survivable_actions) == 0:

            # must die
            # TODO: might want to do something that can help team mate
            # TODO: kick if possible
            print("Must die", constants.Action.Stop)
            return super().act(obs, action_space)
            # return constants.Action.Stop.value

        elif len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value

        # Move towards good items
        good_items = [constants.Item.ExtraBomb, constants.Item.IncrRange]
        # TODO : kick may be a good item only if I cannot kick yet
        # TODO : might want to destroy
        good_items.append(constants.Item.Kick)
        # positions with good items
        good_time_positions = set()
        for item in good_items:
            good_time_positions = good_time_positions.union(reachable_items[item])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward good item", action)
                return action.value

        # TODO : shoud check the survivability of all agents in one method

        # Place a bomb if
        # - it does not significantly reduce my survivability
        # - it can break wood
        # - it can reduce the survivability of enemies
        if is_survivable[constants.Action.Bomb]:
            # if survavable now after bomb, consider bomb
            if all([len(s) > 0 for s in survivable_with_bomb]):
                # if survivable all the time after bomb, consider bomb
                if all([self._can_break_wood(list_boards_no_move[-1],
                                             my_position,
                                             my_blast_strength)]
                       + [not utility.position_is_flames(board, my_position)
                          for board in list_boards_no_move[:10]]):
                    # place bomb if can break wood
                    print("Bomb to break wood", constants.Action.Bomb)
                    return constants.Action.Bomb.value

                for enemy in my_enemies:
                    # check if the enemy is reachable
                    if len(reachable_items_no_move[enemy]) == 0:
                        continue

                    # can reach the enemy at enemy_position in enemy_time step
                    enemy_time = reachable_items_no_move[enemy][0][0]
                    enemy_position = reachable_items_no_move[enemy][0][1:3]

                    # find direction towards enemy
                    positions = set([x[1:3] for x in next_to_items_no_move[enemy]])
                    for t in range(enemy_time, 1, -1):
                        _positions = set()
                        for position in positions:
                            _positions = _positions.union(prev_no_move[t][position])
                        positions = _positions.copy()

                    #if enemy_time <= my_blast_strength:
                    if True:
                        positions.add(my_position)
                        positions_after_bomb = set(survivable[1]).difference(positions)
                        if positions_after_bomb:
                            print("Bomb to kill an enemy", enemy, constants.Action.Bomb)
                            return constants.Action.Bomb.value
                    else:
                        # bomb to kick
                        x0, y0 = my_position
                        positions_against = [(2*x0-x, 2*y0-y) for (x, y) in positions]
                        positions_after_bomb = set(survivable[1]).intersection(positions_against)

                        if positions_after_bomb:
                            print("Bomb to kick", enemy, constants.Action.Bomb)
                            return constants.Action.Bomb.value

                    """
                    # check if placing a bomb can reduce the survivability
                    # of the enemy
                    survivable_before, _ = self._search_time_expanded_network(list_boards_no_move,
                                                                              enemy_position)

                    board_with_bomb = deepcopy(obs["board"])
                    curr_bombs_with_bomb = deepcopy(curr_bombs)
                    # lay a bomb
                    board_with_bomb[my_position] = constants.Item.Bomb.value
                    bomb = characters.Bomb(characters.Bomber(),  # dummy owner of the bomb
                                           my_position,
                                           constants.DEFAULT_BOMB_LIFE,
                                           my_blast_strength,
                                           None)
                    curr_bombs_with_bomb.append(bomb)
                    list_boards_with_bomb, _ \
                        = self._board_sequence(board_with_bomb,
                                               curr_bombs_with_bomb,
                                               curr_flames,
                                               self._search_range,
                                               my_position,
                                               enemy_mobility=0)
                    survivable_after, _ \
                        = self._search_time_expanded_network(list_boards_with_bomb,
                                                             enemy_position)

                    good_before = np.array([len(s) for s in survivable_before])
                    good_after = np.array([len(s) for s in survivable_after])
                    # TODO : what are good criteria?
                    if any(good_after < good_before):
                        # place a bomb if it makes sense
                        print("Bomb to kill an enemy", constants.Action.Bomb)
                        return constants.Action.Bomb.value
                    """

        # Move towards a wood
        if len(next_to_items_no_move[constants.Item.Wood]) > 0:
            # positions next to wood
            good_time_positions = next_to_items_no_move[constants.Item.Wood]
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward wood", action)
                return action.value

        # kick whatever I can kick
        # -- tentative, this is generally not a good strategy
        if len(kickable) > 0:

            while kickable:
                # then consider what happens if I kick a bomb
                next_position = kickable.pop()

                # do not kick a bomb if it will break a wall
                if all([moving_direction[next_position] is None,
                        self._can_break_wood(board, next_position, my_blast_strength)]):
                    # if it is a static bomb
                    # do not kick if it is breaking a wall
                    continue

                my_action = self._get_direction(my_position, next_position)
                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           curr_bombs,
                                           curr_flames,
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=3)
                survivable_with_kick, prev_kick \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    print("Kicking", my_action)
                    return my_action.value

        # Move towards an enemy
        good_time_positions = set()
        for enemy in my_enemies:
            good_time_positions = good_time_positions.union(next_to_items[enemy])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)

            if obs["bomb_life"][my_position] > 0:
                # if on a bomb, move away
                if action == constants.Action.Down and is_survivable[constants.Action.Up]:
                    action = constants.Action.Up
                elif action == constants.Action.Up and is_survivable[constants.Action.Down]:
                    action = constants.Action.Down
                elif action == constants.Action.Right and is_survivable[constants.Action.Left]:
                    action = constants.Action.Left
                elif action == constants.Action.Left and is_survivable[constants.Action.Right]:
                    action = constants.Action.Right
                else:
                    action = None

            if action is not None:
                print("Moving toward/against enemy", action)
                return action.value

        #
        # as in the agent from the previous competition
        #
        action = super().act(obs, action_space)
        if is_survivable[constants.Action(action)]:
            print("Action from prev. agent", constants.Action(action))
            return action
        else:
            action = random.choice(survivable_actions)
            print("Random action", action)
            return action.value
예제 #6
0
    def _get_flames(self, board, prev_flame_life, bomb_position_strength):

        """
        Summarize information about flames

        Parameters
        ----------
        board : array
            pommerman board
        prev_flame_life : array
            remaining life of flames in the previous step
        exploted_position_strength : list
           list of pairs of position and strength of bombs just exploded

        Return
        ------
        curr_flames : list
            list of Flames
        flame_life : array
            remaining life of flames
        """

        flame_life = prev_flame_life - (prev_flame_life > 0)  # decrement by 1

        for (x, y), strength in bomb_position_strength:
            if not utility.position_is_flames(board, (x, y)):
                # not exploded yet
                continue
            # To up and stop
            for dx in range(0, strength):
                position = (x + dx, y)
                if not self._on_board(position):
                    break
                elif utility.position_is_flames(board, position):
                    flame_life[position] = 3
            # To down
            for dx in range(1, strength):
                position = (x - dx, y)
                if not self._on_board(position):
                    break
                elif utility.position_is_flames(board, position):
                    flame_life[position] = 3
            # To right
            for dy in range(1, strength):
                position = (x, y + dy)
                if not self._on_board(position):
                    break
                elif utility.position_is_flames(board, position):
                    flame_life[position] = 3
            # To left
            for dy in range(1, strength):
                position = (x, y - dy)
                if not self._on_board(position):
                    break
                elif utility.position_is_flames(board, position):
                    flame_life[position] = 3

        curr_flames = list()
        rows, cols = np.where(flame_life > 0)
        for position in zip(rows, cols):
            flame = characters.Flame(position, flame_life[position] - 1)
            curr_flames.append(flame)

        return curr_flames, flame_life
예제 #7
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        #board = obs['board']
        board = info["recently_seen"]
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [
            constants.Item(e) for e in obs['enemies']
            if e != constants.Item.AgentDummy
        ]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None
        my_kick = obs["can_kick"]  # whether I can kick

        #
        # Understand current situation
        #

        # positions that might be blocked
        if info["teammate_position"] is None:
            agent_positions = info["enemy_positions"]
        else:
            agent_positions = info["enemy_positions"] + [
                info["teammate_position"]
            ]

        # survivable actions

        if len(info["enemy_positions"]) > 0:
            mobility = self._enemy_mobility
        else:
            mobility = 0

        n_survivable, is_survivable, list_boards \
            = self._get_survivable(obs, info, my_position, info["my_next_position"], agent_positions,
                                   info["all_kickable"], allow_kick_to_fog=False,
                                   enemy_mobility=mobility, enemy_bomb=self._enemy_bomb,
                                   step_to_collapse=info["step_to_collapse"],
                                   collapse_ring=info["collapse_ring"])

        for a in info["might_block_actions"]:
            n_survivable[a] = np.zeros(self._search_range)
            is_survivable[a] = False

        survivable_actions = list()
        for a in is_survivable:
            if not is_survivable[a]:
                continue
            if info["might_blocked"][a] and not is_survivable[
                    constants.Action.Stop]:
                continue
            if n_survivable[a][-1] <= 1:
                is_survivable[a] = False
                continue
            survivable_actions.append(a)

        #
        # Choose action
        #

        if len(survivable_actions) == 0:

            #
            # return None, if no survivable actions
            #

            return None

        elif len(survivable_actions) == 1:

            #
            # Choose the survivable action, if it is the only choice
            #

            action = survivable_actions[0]
            return action.value

        if all([
                info["prev_action"]
                not in [constants.Action.Stop, constants.Action.Bomb],
                info["prev_position"] == my_position
        ]):
            # if previously blocked, do not reapeat with some probability
            self._inv_tmp *= self._backoff
        else:
            self._inv_tmp = self._inv_tmp_init

        #
        # Bomb at a target
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = info["list_boards_no_move"]
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])

        if info["teammate_position"] is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"])

            if n_survivable_nodes_teammate[my_teammate] > 0:
                LB = self._teammate_survivability_threshold / n_survivable_nodes_teammate[
                    my_teammate]
                positions_teammate_safe = np.where(
                    total_frac_blocked_teammate < LB)
                total_frac_blocked_teammate[positions_teammate_safe] = 0

        p_survivable = defaultdict(float)
        for action in n_survivable:
            p_survivable[action] = sum(
                n_survivable[action]) / self._my_survivability_threshold
            if p_survivable[action] > 1:
                p_survivable[action] = 1

        block = defaultdict(float)
        for action in [
                constants.Action.Stop, constants.Action.Up,
                constants.Action.Down, constants.Action.Left,
                constants.Action.Right
        ]:
            next_position = info["my_next_position"][action]
            if next_position is None:
                continue
            if next_position in info["all_kickable"]:
                # kick will be considered later
                continue
            if all([
                    utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1,
                    is_survivable[constants.Action.Stop]
            ]):
                # if the next position is flames,
                # I want to stop to wait, which must be feasible
                block[action] = total_frac_blocked[
                    next_position] * p_survivable[constants.Action.Stop]
                if info["teammate_position"] is not None:
                    block[action] *= (
                        1 - total_frac_blocked_teammate[next_position])
                if block[action] > 0:
                    block[action] *= self._inv_tmp
                    block[action] -= np.log(-np.log(self.random.uniform()))
                continue
            elif not is_survivable[action]:
                continue
            if all([
                    info["might_blocked"][action],
                    not is_survivable[constants.Action.Stop]
            ]):
                continue

            block[action] = total_frac_blocked[next_position] * p_survivable[
                action]
            if info["teammate_position"] is not None:
                block[action] *= (1 -
                                  total_frac_blocked_teammate[next_position])
            if block[action] > 0:
                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

            if info["might_blocked"][action]:
                block[action] = (total_frac_blocked[my_position] *
                                 p_survivable[constants.Action.Stop] +
                                 total_frac_blocked[next_position] *
                                 p_survivable[action]) / 2
                if info["teammate_position"] is not None:
                    block[action] *= (
                        1 - total_frac_blocked_teammate[next_position])
                if block[action] > 0:
                    block[action] *= self._inv_tmp
                    block[action] -= np.log(-np.log(self.random.uniform()))

        if is_survivable[constants.Action.Bomb]:
            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            n_survivable_nodes_with_bomb = defaultdict(int)
            for enemy_position in info["enemy_positions"]:
                # get survivable tree of the enemy
                _survivable = search_time_expanded_network(
                    list_boards_with_bomb, enemy_position)
                n_survivable_nodes_with_bomb[enemy_position] = sum(
                    [len(positions) for positions in _survivable])

            n_with_bomb = sum([
                n_survivable_nodes_with_bomb[enemy_position]
                for enemy_position in info["enemy_positions"]
            ])
            n_with_none = sum(
                [n_survivable_nodes[enemy] for enemy in my_enemies])
            if n_with_none == 0:
                total_frac_blocked_with_bomb = 0

                # place more bombs, so the stacked enemy cannot kick
                x, y = my_position
                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_position = (x + dx, y + dy)
                    following_position = (x + 2 * dx, y + 2 * dy)
                    if not self._on_board(following_position):
                        continue
                    if all([
                            obs["bomb_life"][next_position] > 0,
                            board[following_position] >
                            constants.Item.AgentDummy.value
                    ]):
                        total_frac_blocked_with_bomb = 1
            else:
                total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none

            action = constants.Action.Bomb
            block[action] = total_frac_blocked_with_bomb
            # block[action] += total_frac_blocked[my_position] * (eisenachAgents - total_frac_blocked_with_bomb)
            block[action] *= p_survivable[action]

            block_teammate_with_bomb = None
            if block[action] > 0:
                if info["teammate_position"] is not None:
                    block_teammate_with_bomb \
                        = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                           n_survivable_nodes_teammate,
                                                           board,
                                                           [my_teammate],
                                                           ignore_dying_agent=True)

                    block_teammate_with_bomb \
                        += total_frac_blocked_teammate[my_position] * (1 - block_teammate_with_bomb)
                    block[action] *= (1 - block_teammate_with_bomb)

                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

        for next_position in info["kickable"]:

            action = self._get_direction(my_position, next_position)
            if not is_survivable[action]:
                continue

            list_boards_with_kick, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=action,
                                       can_kick=True,
                                       step_to_collapse=info["step_to_collapse"],
                                       collapse_ring=info["collapse_ring"])

            block[action] \
                = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies,
                                                   ignore_dying_agent=True)
            block[action] += total_frac_blocked[next_position] * (
                1 - block[action])
            block[action] *= p_survivable[action]

            if block[action] > 0:
                if info["teammate_position"] is not None:
                    block_teammate_with_kick \
                        = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                           n_survivable_nodes_teammate,
                                                           board, [my_teammate],
                                                           ignore_dying_agent=True)
                    block_teammate_with_kick \
                        += total_frac_blocked_teammate[next_position] * (1 - block_teammate_with_kick)
                    block[action] *= (1 - block_teammate_with_kick)

                block[action] *= self._inv_tmp
                block[action] -= np.log(-np.log(self.random.uniform()))

        max_block = 0  # do not choose zero blocking action as the best
        best_action = None
        for action in block:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp:
            if info["teammate_position"] is not None:
                if block_teammate_with_bomb is None:
                    block_teammate_with_bomb \
                        = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                           n_survivable_nodes_teammate,
                                                           board,
                                                           [my_teammate],
                                                           ignore_dying_agent=True)

                teammate_safety = block_teammate_with_bomb * n_survivable_nodes_teammate[
                    my_teammate]
                if any([
                        teammate_safety >
                        self._teammate_survivability_threshold,
                        block_teammate_with_bomb < self._interfere_threshold,
                        block_teammate_with_bomb <
                        total_frac_blocked_teammate[my_position]
                ]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if teammate_ok:
                if best_action == constants.Action.Bomb:
                    return constants.Action.Bomb.value

                if best_action == constants.Action.Stop:
                    return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = info["my_next_position"][best_action]

            should_chase = (total_frac_blocked[next_position] >
                            self._chase_threshold)

            if info["teammate_position"] is not None:
                teammate_safety = total_frac_blocked_teammate[
                    next_position] * n_survivable_nodes_teammate[my_teammate]
                if any([
                        teammate_safety >
                        self._teammate_survivability_threshold,
                        total_frac_blocked_teammate[next_position] <
                        self._interfere_threshold,
                        total_frac_blocked_teammate[next_position] <
                        total_frac_blocked_teammate[my_position]
                ]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if should_chase and teammate_ok:
                if all([
                        utility.position_is_flames(board, next_position),
                        info["flame_life"][next_position] > 1,
                        is_survivable[constants.Action.Stop]
                ]):
                    action = constants.Action.Stop
                    return action.value
                else:
                    return best_action.value

        # Exclude the action representing stop to wait
        max_block = 0  # do not choose zero blocking action as the best
        best_action = None
        for action in survivable_actions:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        #
        # Do not take risky actions when not interacting with enemies
        #

        most_survivable_action = self._action_most_survivable(n_survivable)

        if total_frac_blocked[my_position] > 0:
            # ignore actions with low survivability
            _survivable_actions = list()
            for action in n_survivable:
                n = sum(n_survivable[action])
                if not is_survivable[action]:
                    continue
                elif n > self._my_survivability_threshold:
                    _survivable_actions.append(action)
                else:
                    is_survivable[action] = False

            if len(_survivable_actions) > 1:
                survivable_actions = _survivable_actions
            elif best_action is not None:
                return best_action.value
            else:
                # Take the most survivable action
                return most_survivable_action.value

        #
        # Do not interfere with teammate
        #

        if all([
                info["teammate_position"] is not None,
                len(info["enemy_positions"]) > 0 or len(info["curr_bombs"]) > 0
        ]):
            # ignore actions that interfere with teammate
            min_interfere = np.inf
            least_interfere_action = None
            _survivable_actions = list()
            for action in survivable_actions:
                if action == constants.Action.Bomb:
                    """
                    if block_teammate_with_bomb is None:
                        block_teammate_with_bomb \
                            = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                               n_survivable_nodes_teammate,
                                                               board,
                                                               [my_teammate],
                                                               ignore_dying_agent=True)                        
                    frac = block_teammate_with_bomb 
                    """
                    continue
                else:
                    next_position = info["my_next_position"][action]
                    frac = total_frac_blocked_teammate[next_position]
                if frac < min_interfere:
                    min_interfere = frac
                    least_interfere_action = action
                if frac < self._interfere_threshold:
                    _survivable_actions.append(action)
                else:
                    is_survivable[action] = False

            if len(_survivable_actions) > 1:
                survivable_actions = _survivable_actions
            elif best_action is not None:
                # Take the least interfering action
                return best_action.value
            else:
                return least_interfere_action.value

        consider_bomb = True
        if not is_survivable[constants.Action.Bomb]:
            consider_bomb = False

        #
        # Find reachable items
        #

        # List of boards simulated
        list_boards, _ = self._board_sequence(
            board,
            info["curr_bombs"],
            info["curr_flames"],
            self._search_range,
            my_position,
            enemy_mobility=mobility,
            enemy_bomb=self._enemy_bomb,
            enemy_positions=agent_positions,
            agent_blast_strength=info["agent_blast_strength"],
            step_to_collapse=info["step_to_collapse"],
            collapse_ring=info["collapse_ring"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(
                info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, _, _ = self._search_time_expanded_network(
            list_boards, my_position)
        if len(survivable[-1]) == 0:
            survivable = [set() for _ in range(len(survivable))]

        # Items and bomb target that can be reached in a survivable manner
        if "escape" in info:
            reachable_items, _, next_to_items \
                = self._find_reachable_items(list_boards,
                                             my_position,
                                             survivable,
                                             might_powerup=info["escape"])  # might_powerup is the escape from collapse
        else:
            _, _, next_to_items \
                = self._find_reachable_items(list_boards,
                                             my_position,
                                             survivable)

        #
        # If I have seen an enemy recently and cannot see him now, them move to the last seen position
        #

        action = self._action_to_enemy(my_position,
                                       next_to_items[constants.Item.Fog], prev,
                                       is_survivable, info)
        if action is not None:
            return action.value

        #
        # If I have seen a teammate recently, them move away from the last seen position
        #

        action = self._action_away_from_teammate(
            my_position, next_to_items[constants.Item.Fog], prev,
            is_survivable, info)
        if action is not None:
            return action.value

        #
        # Move to the places that will not be collapsed
        #

        if "escape" in info:
            # might_powerup is the escape from collapse
            action = self._action_to_might_powerup(my_position,
                                                   reachable_items, prev,
                                                   is_survivable)
            if action is not None:
                print("Escape from collapse", action)
                return action.value

        #
        # Move towards a fog where we have not seen longest
        #

        action = self._action_to_fog(my_position,
                                     next_to_items[constants.Item.Fog], prev,
                                     is_survivable, info)

        if action is not None:
            #if True:
            if self.random.uniform() < 0.8:
                return action.value

        #
        # Choose most survivable action
        #

        max_block = 0
        best_action = None
        for action in survivable_actions:
            if action == constants.Action.Bomb:
                continue
            score = block[action]
            if action != constants.Action.Bomb:
                score += np.random.uniform(0, 1e-3)
            if score > max_block:
                max_block = score
                best_action = action

        if best_action is None:
            max_p = 0
            best_action = None
            for action in p_survivable:
                score = p_survivable[action]
                if action != constants.Action.Bomb:
                    score += np.random.uniform(0, 1e-3)
                if score > max_p:
                    max_p = score
                    best_action = action

        if best_action is None:
            # this should not be the case
            return None
        else:
            return best_action.value
예제 #8
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        enemy_mobility = 4
        enemy_bomb = 1

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]
        my_kick = obs["can_kick"]  # whether I can kick

        print("my position",
              my_position,
              "ammo",
              my_ammo,
              "blast",
              my_blast_strength,
              "kick",
              my_kick,
              end="\t")

        #
        # Understand current situation
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value
        total_frac_blocked, n_survivable_nodes \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])
        # TODO : PARAMETER TO OPTIMIZE
        bomb_target_enemy = (total_frac_blocked > 0.1)

        #np.set_printoptions(precision=2)
        #print("frac")
        #print(total_frac_blocked)

        #print("target")
        #print(bomb_target_enemy)

        # List of boards simulated
        list_boards, _ = self._board_sequence(
            board,
            info["curr_bombs"],
            info["curr_flames"],
            self._search_range,
            my_position,
            enemy_mobility=enemy_mobility,
            enemy_bomb=enemy_bomb,
            enemy_blast_strength=info["enemy_blast_strength"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(
                info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value

        #for t, b in enumerate(list_boards):
        #    print(t)
        #    print(b)

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, succ, _ \
            = self._search_time_expanded_network(list_boards,
                                                 my_position)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           enemy_mobility=enemy_mobility,
                                           enemy_bomb=enemy_bomb,
                                           enemy_blast_strength=info["enemy_blast_strength"])

        n_survivable = dict()
        kick_actions = list()
        if my_kick:
            # Positions where we kick a bomb if we move to
            kickable, _ = self._kickable_positions(obs,
                                                   info["moving_direction"])
            for next_position in kickable:
                # consider what happens if I kick a bomb
                my_action = self._get_direction(my_position, next_position)

                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue

                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           info["curr_bombs"],
                                           info["curr_flames"],
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=enemy_mobility,
                                           enemy_bomb=enemy_bomb,
                                           enemy_blast_strength=info["enemy_blast_strength"])
                #print(list_boards_with_kick)
                survivable_with_kick, prev_kick, succ_kick, _ \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    is_survivable[my_action] = True
                    n_survivable[my_action] = [1] + [
                        len(s) for s in survivable_with_kick[1:]
                    ]
                    kick_actions.append(my_action)
        else:
            kickable = set()

        survivable_actions = [a for a in is_survivable if is_survivable[a]]

        if len(survivable_actions) == 0:
            return None

        #
        # bomb target that can be reached in a survivable manner
        #

        reachable_items, reached, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable,
                                         bomb_target_enemy)

        #
        # Evaluate the survivability of each action
        #

        x, y = my_position
        for action in survivable_actions:
            # for each survivable action, check the survivability
            if action == constants.Action.Bomb:
                n_survivable[action] = [
                    len(s) for s in survivable_with_bomb[1:]
                ]
                continue

            if action == constants.Action.Up:
                dx = -1
                dy = 0
            elif action == constants.Action.Down:
                dx = 1
                dy = 0
            elif action == constants.Action.Left:
                dx = 0
                dy = -1
            elif action == constants.Action.Right:
                dx = 0
                dy = 1
            elif action == constants.Action.Stop:
                dx = 0
                dy = 0
            else:
                raise ValueError()
            next_position = (x + dx, y + dy)
            n_survivable[action], _info = self._count_survivable(
                succ, 1, next_position)

        if verbose:
            print("n_survivable")
            for a in n_survivable:
                print(a, n_survivable[a])

        #
        # Avoid the action leading to no choice if possible
        #
        updated = False

        max_survivable_positions = max([n[-1] for n in n_survivable.values()])
        if max_survivable_positions > 1:
            for a in n_survivable:
                if n_survivable[a][-1] > max_survivable_positions / 2:
                    continue
                is_survivable[a] = False
                updated = True

        minn = defaultdict(int)
        for a in n_survivable:
            minn[a] = min(n_survivable[a][enemy_mobility:])
        maxmin = max(minn.values())
        if maxmin > 1:
            for a in minn:
                if minn[a] == 1:
                    is_survivable[a] = False
                    updated = True

        if updated:
            survivable_actions = [a for a in is_survivable if is_survivable[a]]

        #
        # Choose the survivable action, if it is the only choice
        #

        if len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value
        """
        #
        # Bomb if it has dominating survivability
        #

        if is_survivable[constants.Action.Bomb]:
            bomb_is_most_survivable = True
            bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb]))
            for action in n_survivable:
                if action == constants.Action.Bomb:
                    continue
                action_sorted = np.array(sorted(n_survivable[action]))
                if any(action_sorted > bomb_sorted):
                    bomb_is_most_survivable = False
                    break
            if bomb_is_most_survivable:
                action = constants.Action.Bomb
                print("Bomb to survive", action)
                return action.value
        """

        #
        # Bomb at a target
        #

        best_action = None
        max_block = 0
        for action in [
                constants.Action.Stop, constants.Action.Bomb,
                constants.Action.Up, constants.Action.Down,
                constants.Action.Right, constants.Action.Left
        ]:
            next_position = self._get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue
            if utility.position_is_flames(board, next_position):
                if not is_survivable[constants.Action.Stop]:
                    # if the next position is flames,
                    # I want to stop to wait, which must be feasible
                    continue
            else:
                if not is_survivable[action]:
                    continue
            block = total_frac_blocked[next_position]
            if block > max_block:
                max_block = block
                best_action = action

        if all([
                is_survivable[constants.Action.Bomb], best_action
                in [constants.Action.Stop, constants.Action.Bomb]
        ]):
            print("Place a bomb at a locally optimal position",
                  constants.Action.Bomb)
            return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = self._get_next_position(my_position, best_action)
            # TODO : PARAMETER TO OPTIMIZE
            if total_frac_blocked[next_position] > 0.01:
                if utility.position_is_flames(board, next_position):
                    action = constants.Action.Stop
                    print("Wait flames life", action)
                    return action.value
                else:
                    print("Move towards better place to bomb", best_action)
                    return best_action.value

        #
        # Kick
        #

        for my_action in kick_actions:
            if my_action == constants.Action.Up:
                next_position = (my_position[0] - 1, my_position[1])
            elif my_action == constants.Action.Down:
                next_position = (my_position[0] + 1, my_position[1])
            elif my_action == constants.Action.Right:
                next_position = (my_position[0], my_position[1] + 1)
            elif my_action == constants.Action.Left:
                next_position = (my_position[0], my_position[1] - 1)
            # do not kick a bomb if it will break enemies
            if info["moving_direction"][next_position] is None:
                print("checking static bomb")
                # if it is a static bomb
                if self._can_break(info["list_boards_no_move"][0],
                                   next_position, my_blast_strength,
                                   my_enemies):
                    continue

            list_boards_with_kick_no_move, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True,
                                       enemy_mobility=0)

            for enemy in my_enemies:
                rows, cols = np.where(board == enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable, _, _, _ \
                    = self._search_time_expanded_network(list_boards_with_kick_no_move,
                                                         enemy_position)

                n_survivable_nodes_with_kick = sum(
                    [len(positions) for positions in _survivable])
                if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]:
                    print("Kicking to reduce the survivability",
                          n_survivable_nodes[enemy], "->",
                          n_survivable_nodes_with_kick, my_action)
                    return my_action.value

        good_time_positions = reachable_items["target"]
        if all([(0, ) + my_position in good_time_positions,
                is_survivable[constants.Action.Bomb]]):
            print("place a bomb at a bomb target", constants.Action.Bomb)
            return constants.Action.Bomb.value

        if good_time_positions:
            score = [
                total_frac_blocked[(x, y)] / (t + 1)
                for t, x, y in good_time_positions
            ]
            argmax = np.argwhere(score == np.max(score))
            best_time_positions = [good_time_positions[i[0]] for i in argmax]
            action = self._find_distance_minimizer(
                my_position,
                best_time_positions,
                #good_time_positions,
                prev,
                is_survivable)
            if action is not None:
                print("Moving toward where to bomb", action)
                return action.value

        #
        # Move towards a fog where we have not seen longest
        #

        best_time_position = None
        oldest = 0
        for t, x, y in next_to_items[constants.Item.Fog]:
            neighbors = [(x + dx, y + dy)
                         for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]]
            age = max([
                info["since_last_seen"][position] for position in neighbors
                if self._on_board(position)
            ])
            if age > oldest:
                oldest = age
                best_time_position = (t, x, y)

        if best_time_position is not None:
            action = self._find_distance_minimizer(my_position,
                                                   [best_time_position], prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward oldest fog", action)
                return action.value

        #
        # Choose most survivable action
        #

        action = self._get_most_survivable_action(n_survivable)
        print("Most survivable action", action)
        return action.value
예제 #9
0
    def act(self, obs, action_space):
        def convert_bombs(bomb_map):
            ret = []
            locations = np.where(bomb_map > 0)
            for r, c in zip(locations[0], locations[1]):
                ret.append({
                    'position': (r, c),
                    'blast_strength': int(bomb_map[(r, c)])
                })
            return ret

        depth = 20

        my_position = tuple(obs['position'])
        board = np.array(obs['board'])
        bombs = convert_bombs(np.array(obs['bomb_blast_strength']))
        enemies = [constants.Item(e) for e in obs['enemies']]
        ammo = int(obs['ammo'])
        blast_strength = int(obs['blast_strength'])

        if self.prev_pos != None:
            if self.prev_pos == my_position:
                if 1 <= self.prev_action.value <= 4:
                    if self.logging:
                        print('freeze')
                    board[self.prev_pos] = constants.Item.Rigid.value

        items, dist, prev = self._djikstra(board,
                                           my_position,
                                           bombs,
                                           enemies,
                                           bomb_timer=self.bomb_time,
                                           depth=depth)

        if self.logging:
            print('my_position =', my_position)
            print('board =')
            print(board)
            print('dist =')
            print(dist)
            print('bombs =', bombs)
            print('enemies =', enemies)
            for e in enemies:
                print(e)
                pos = items.get(e, [])
                print('pos =', pos)
                print('pos_len=', len(pos))
                if len(pos) > 0:
                    print('xy=', pos[0][0], ',', pos[0][1])
                # print('pos_r =', x, ',',y)
            print('ammo =', ammo)
            print('blast_strength =', blast_strength)

        test_ary = np.ones((11, 11))

        for c in range(11):
            for r in range(11):
                if (r, c) in dist:
                    test_ary[r, c] = dist[(r, c)]
                else:
                    test_ary[r, c] = -1

        if self.logging:
            print("dist_mat:")
            print(test_ary)

        # update bomb_time map
        bomb_life = 8
        has_bomb = {}
        already_breakable = np.zeros((11, 11))
        for b in bombs:
            r, c = b['position']
            strength = b['blast_strength']
            # print('bomb_cr =', c, 'r=', r, 'st=', strength)

            if self.bomb_time[(r, c)] == 0:
                self.bomb_time[(r, c)] = bomb_life
            else:
                self.bomb_time[(r, c)] -= 1

            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                for d in range(1, strength):

                    new_pos = (r + d * row, c + d * col)

                    if TestSimpleAgent._out_of_board(new_pos):
                        continue

                    # if new_pos[0] < 0 or new_pos[0] > 10:
                    #     continue
                    # if new_pos[1] < 0 or new_pos[1] > 10:
                    #     continue

                    if utility.position_is_rigid(board, new_pos):
                        continue

                    if utility.position_is_wood(board, new_pos):
                        already_breakable[new_pos] = 1

                    if self.bomb_time[new_pos] == 0:
                        self.bomb_time[new_pos] = bomb_life
                    else:
                        self.bomb_time[new_pos] -= 1

                    has_bomb[new_pos] = 1

        # clear up table
        for c in range(11):
            for r in range(11):
                if (r, c) not in has_bomb:
                    self.bomb_time[(r, c)] = 0

        if self.logging:
            print("bomb_time:")
            print(self.bomb_time)

        # evaluate each position in terms of breakable woods
        num_breakable = np.zeros((11, 11))
        num_breakable_inside = np.zeros((11, 11))
        for c in range(11):
            for r in range(11):
                if utility.position_is_wood(board, (r, c)):
                    if already_breakable[(r, c)]:
                        continue
                    for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                        for d in range(1, blast_strength):
                            new_pos = (r + d * row, c + d * col)

                            if TestSimpleAgent._out_of_board(new_pos):
                                continue

                            if utility.position_is_passable(
                                    board, new_pos,
                                    enemies) or utility.position_is_flames(
                                        board, new_pos):
                                num_breakable[new_pos] += 1
                            else:
                                break

                    tmp_num = 0
                    has_passable = False
                    for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                        new_pos = (r + row, c + col)
                        if TestSimpleAgent._out_of_board(new_pos):
                            continue

                        if utility.position_is_wood(board, new_pos):
                            tmp_num += 1
                        elif utility.position_is_passable(
                                board, new_pos, enemies):
                            has_passable = True

                    if (not has_passable) and tmp_num > 0:
                        tmp_num -= 1

                    num_breakable_inside[(r, c)] = tmp_num

        if self.logging:
            print('num_breakable:')
            print(num_breakable)

            print('num_breakable_inside:')
            print(num_breakable_inside)

        num_breakable_total = np.zeros((11, 11))
        for c in range(11):
            for r in range(11):
                num_breakable_total[(r, c)] = num_breakable[(r, c)]

                if num_breakable_total[(r, c)] == -1 or num_breakable_total[(
                        r, c)] == np.inf:
                    continue

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_pos = (r + row, c + col)

                    if new_pos[0] < 0 or new_pos[0] > 10:
                        continue
                    if new_pos[1] < 0 or new_pos[1] > 10:
                        continue

                    num_breakable_total[(
                        r, c)] += num_breakable_inside[new_pos] * 0.5

        if self.logging:
            print('num_breakable_total:')
            print(num_breakable_total)

        # evaluate each position in total
        pos_scores = np.zeros((11, 11))
        for c in range(11):
            for r in range(11):
                if (r, c) not in dist:
                    pos_scores[(r, c)] = -1
                    continue
                elif dist[(r, c)] == np.inf:
                    pos_scores[(r, c)] = np.inf
                    continue

                if num_breakable_total[(r, c)] > 0:
                    pos_scores[(r, c)] += num_breakable_total[(r, c)]
                    pos_scores[(r, c)] += (depth - dist[(r, c)]) * 0.2

                # consider power-up items
                if board[(r, c)] in {
                        constants.Item.ExtraBomb.value,
                        constants.Item.IncrRange.value
                }:
                    pos_scores[(r, c)] += 50

        if self.logging:
            print('pos_score:')
            print(pos_scores)

        # consider degree of freedom
        dis_to_ene = 100
        for e in enemies:
            pos = items.get(e, [])
            if len(pos) > 0:
                d = abs(pos[0][0] - my_position[0]) + abs(pos[0][1] -
                                                          my_position[1])
                if dis_to_ene > d:
                    dis_to_ene = d
        if dis_to_ene <= -4:
            # if direction is not None:
            deg_frees = np.zeros((11, 11))
            for c in range(11):
                for r in range(11):
                    # if pos_scores[(r, c)] == np.inf:
                    #     continue
                    if not utility.position_is_passable(
                            board, (r, c), enemies):
                        continue

                    deg_free = 0
                    for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                        new_pos = (r + row, c + col)
                        if new_pos[0] < 0 or new_pos[0] > 10:
                            continue
                        if new_pos[1] < 0 or new_pos[1] > 10:
                            continue

                        if utility.position_is_passable(
                                board, new_pos,
                                enemies) or utility.position_is_flames(
                                    board, new_pos):
                            deg_free += 1

                    deg_frees[(r, c)] = deg_free

                    if deg_free <= 1:
                        pos_scores[(r, c)] -= 5

            if self.logging:
                print('deg_free')
                print(deg_frees)

        # consider bomb blast
        for i in range(len(bombs)):
            r, c = bombs[i]['position']
            strength = bombs[i]['blast_strength']

            pos_scores[(r, c)] = -20

            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                for d in range(1, strength):

                    new_pos = (r + d * row, c + d * col)
                    if new_pos[0] < 0 or new_pos[0] > 10:
                        continue
                    if new_pos[1] < 0 or new_pos[1] > 10:
                        continue

                    if new_pos not in dist:
                        continue
                    elif new_pos == np.inf:
                        continue

                    pos_scores[new_pos] = -20

        if self.logging:
            print('consider blast pos_score:')
            print(pos_scores)

        # consider enemies
        for e in enemies:
            pos = items.get(e, [])
            if len(pos) > 0:
                r = pos[0][0]
                c = pos[0][1]

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    for d in range(1, blast_strength * 2):
                        new_pos = (r + d * row, c + d * col)
                        if new_pos[0] < 0 or new_pos[0] > 10:
                            continue
                        if new_pos[1] < 0 or new_pos[1] > 10:
                            continue

                        if not utility.position_is_passable(
                                board, new_pos, enemies):
                            break

                        pos_scores[new_pos] += 0.3

        if self.logging:
            print('consider enemy:')
            print(pos_scores)

        h_r, h_c = -1, -1
        h_score = -1
        for c in range(11):
            for r in range(11):
                if (r, c) not in dist:
                    continue
                elif dist[(r, c)] == np.inf:
                    continue

                if h_score < pos_scores[(r, c)]:
                    h_score = pos_scores[(r, c)]
                    h_r, h_c = (r, c)

        if self.logging:
            print('h_score and pos:', h_score, '(r, c) =', h_r, ',', h_c)
            print('prev:')
            print(prev)

        # if current position is not the highest score position, move to the highest position.
        if h_r == -1:
            # print('action: Stop')
            self.prev_action = constants.Action.Stop
            # return constants.Action.Stop.value
        elif pos_scores[my_position] == h_score:
            if self._can_escape(pos_scores, my_position, blast_strength):
                # print('set bomb')
                self.prev_action = constants.Action.Bomb
                # return constants.Action.Bomb.value
            else:
                # print('action: Stop2')
                self.prev_action = constants.Action.Stop
                # return constants.Action.Stop.value
        else:
            # print('action: backtrack')
            self.prev_action = self._backtrack(my_position, (h_r, h_c), prev)
            # return self._backtrack(my_position, (h_r, h_c), prev)

        self.prev_pos = my_position
        if self.logging:
            print('action: ', self.prev_action)
        return self.prev_action.value

        # Move if we are in an unsafe place.
        unsafe_directions = self._directions_in_range_of_bomb(
            board, my_position, bombs, dist)
        if unsafe_directions:
            directions = self._find_safe_directions(board, my_position,
                                                    unsafe_directions, bombs,
                                                    enemies)
            return random.choice(directions).value

        # Lay pomme if we are adjacent to an enemy.
        if self._is_adjacent_enemy(items, dist, enemies) and self._maybe_bomb(
                ammo, blast_strength, items, dist, my_position):
            return constants.Action.Bomb.value

        # Move towards an enemy if there is one in exactly three reachable spaces.
        direction = self._near_enemy(my_position, items, dist, prev, enemies,
                                     3)
        if direction is not None and (self._prev_direction != direction
                                      or random.random() < .5):
            self._prev_direction = direction
            return direction.value

        # Move towards a good item if there is one within two reachable spaces.
        direction = self._near_good_powerup(my_position, items, dist, prev, 2)
        if direction is not None:
            return direction.value

        # Maybe lay a bomb if we are within a space of a wooden wall.
        if self._near_wood(my_position, items, dist, prev, 1):
            if self._maybe_bomb(ammo, blast_strength, items, dist,
                                my_position):
                return constants.Action.Bomb.value
            else:
                return constants.Action.Stop.value

        # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb.
        direction = self._near_wood(my_position, items, dist, prev, 2)
        if direction is not None:
            directions = self._filter_unsafe_directions(
                board, my_position, [direction], bombs)
            if directions:
                return directions[0].value

        # Choose a random but valid direction.
        directions = [
            constants.Action.Stop, constants.Action.Left,
            constants.Action.Right, constants.Action.Up, constants.Action.Down
        ]
        valid_directions = self._filter_invalid_directions(
            board, my_position, directions, enemies)
        directions = self._filter_unsafe_directions(board, my_position,
                                                    valid_directions, bombs)
        directions = self._filter_recently_visited(
            directions, my_position, self._recently_visited_positions)
        if len(directions) > 1:
            directions = [k for k in directions if k != constants.Action.Stop]
        if not len(directions):
            directions = [constants.Action.Stop]

        # Add this position to the recently visited uninteresting positions so we don't return immediately.
        self._recently_visited_positions.append(my_position)
        self._recently_visited_positions = self._recently_visited_positions[
            -self._recently_visited_length:]

        return random.choice(directions).value
예제 #10
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = obs['board']
        recently_seen_positions = (info["since_last_seen"] < 3)
        board[recently_seen_positions] = info["last_seen"][recently_seen_positions]
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy]
        if obs["teammate"] != constants.Item.AgentDummy:
            my_teammate = obs["teammate"]
        else:
            my_teammate = None
        my_kick = obs["can_kick"]  # whether I can kick

        if verbose:
            print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t")

        my_next_position = {constants.Action.Stop: my_position,
                            constants.Action.Bomb: my_position}
        for action in [constants.Action.Up, constants.Action.Down,
                       constants.Action.Left, constants.Action.Right]:
            next_position = self._get_next_position(my_position, action)
            if self._on_board(next_position):
                if board[next_position] == constants.Item.Rigid.value:
                    my_next_position[action] = None
                else:
                    my_next_position[action] = next_position
            else:
                my_next_position[action] = None

        #
        # Understand current situation
        #

        if all([info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb],
                info["prev_position"] == my_position]):
            # if previously blocked, do not reapeat with some probability
            self._inv_tmp *= self._backoff
        else:
            self._inv_tmp = self._inv_tmp_init

        
        # enemy positions
        enemy_positions = list()
        for enemy in my_enemies:
            rows, cols = np.where(board==enemy.value)
            if len(rows) == 0:
                continue
            enemy_positions.append((rows[0], cols[0]))

        # teammate position
        teammate_position = None
        if my_teammate is not None:
            rows, cols = np.where(board==my_teammate.value)
            if len(rows):
                teammate_position = (rows[0], cols[0])
        
        # Positions where we kick a bomb if we move to
        if my_kick:
            kickable, might_kickable = self._kickable_positions(obs, info["moving_direction"])
        else:
            kickable = set()
            might_kickable = set()

        # positions that might be blocked
        if teammate_position is None:
            agent_positions = enemy_positions
        else:
            agent_positions = enemy_positions + [teammate_position]
        might_blocked = self._get_might_blocked(board, my_position, agent_positions, might_kickable)

        # enemy positions over time
        # these might be dissappeared due to extra flames
        if len(enemy_positions):
            rows = [p[0] for p in enemy_positions]
            cols = [p[1] for p in enemy_positions]
            list_enemy_positions = [(rows, cols)]
            _enemy_positions = list()
            for t in range(self._enemy_mobility):
                rows, cols = list_enemy_positions[-1]
                for x, y in zip(rows, cols):
                    for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]:
                        next_position = (x + dx, y + dy)
                        if not self._on_board(next_position):
                            continue
                        _board = info["list_boards_no_move"][t]
                        if utility.position_is_passage(_board, next_position):
                            _enemy_positions.append(next_position)
            _enemy_positions = set(_enemy_positions)
            rows = [p[0] for p in _enemy_positions]
            cols = [p[1] for p in _enemy_positions]
            list_enemy_positions.append((rows, cols))
        else:
            list_enemy_positions = []
            
        
        # survivable actions
        is_survivable = dict()
        for a in self._get_all_actions():
            is_survivable[a] = False
        n_survivable = dict()
        list_boards = dict()
        for my_action in self._get_all_actions():

            next_position = my_next_position[my_action]

            if next_position is None:
                continue

            if my_action == constants.Action.Bomb:
                if any([my_ammo == 0,
                        obs["bomb_blast_strength"][next_position] > 0]):
                    continue
            
            if all([utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1]):
                continue

            if all([my_action != constants.Action.Stop,
                    obs["bomb_blast_strength"][next_position] > 0,
                    next_position not in set.union(kickable, might_kickable)]):
                continue

            if next_position in set.union(kickable, might_kickable):
                # do not kick into fog
                dx = next_position[0] - my_position[0]
                dy = next_position[1] - my_position[1]
                position = next_position
                is_fog = False
                while self._on_board(position):
                    if utility.position_is_fog(board, position):
                        is_fog = True
                        break
                    position = (position[0] + dx, position[1] + dy)
                if is_fog:
                    continue
            
            # list of boards from next steps
            list_boards[my_action], _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=my_action,
                                       can_kick=my_kick,
                                       enemy_mobility=self._enemy_mobility,
                                       enemy_bomb=self._enemy_bomb,
                                       agent_blast_strength=info["agent_blast_strength"])

            # agents might be disappeared, because of overestimated bombs
            for t, positions in enumerate(list_enemy_positions):
                list_boards[my_action][t][positions] = constants.Item.AgentDummy.value
            
            # some bombs may explode with extra bombs, leading to under estimation
            for t in range(len(list_boards[my_action])):
                flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value)
                list_boards[my_action][t][flame_positions] = constants.Item.Flames.value
                
        """
        processed = Parallel(n_jobs=-1, verbose=0)(
            [delayed(search_time_expanded_network)(list_boards[action][1:], my_next_position[action], action)
             for action in list_boards]
        )
        for survivable, my_action in processed:
            if my_next_position[my_action] in survivable[0]:
                is_survivable[my_action] = True
                n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]]
        """
        
        for my_action in list_boards:
            survivable = search_time_expanded_network(list_boards[my_action][1:],
                                                      my_next_position[my_action])
            if my_next_position[my_action] in survivable[0]:
                is_survivable[my_action] = True
                n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]]
        
        survivable_actions = list()
        for a in is_survivable:
            if not is_survivable[a]:
                continue
            if might_blocked[a] and not is_survivable[constants.Action.Stop]:
                continue
            if n_survivable[a][-1] <= 1:
                is_survivable[a] = False
                continue
            survivable_actions.append(a)

        #
        # Choose action
        #
                
        if len(survivable_actions) == 0:

            #
            # return None, if no survivable actions
            #
        
            return None

        elif len(survivable_actions) == 1:

            #
            # Choose the survivable action, if it is the only choice
            #
            
            action = survivable_actions[0]
            if verbose:
                print("The only survivable action", action)
            return action.value


        #
        # Bomb at a target
        #

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])
    
        if teammate_position is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"])

            """
            np.set_printoptions(precision=3)
            print("enemy")
            print(total_frac_blocked)
            print("teammate")
            print(total_frac_blocked_teammate)
            print("product")
            prod = total_frac_blocked * (1 - total_frac_blocked_teammate)
            print(prod[:5,:5])
            """

        p_survivable = defaultdict(float)
        for action in n_survivable:
            p_survivable[action] = sum(n_survivable[action]) / self._my_survivability_threshold
            if p_survivable[action] > 1:
                p_survivable[action] = 1

        block = defaultdict(float)
        for action in [constants.Action.Stop,
                       constants.Action.Up, constants.Action.Down,
                       constants.Action.Left, constants.Action.Right]:
            next_position = my_next_position[action]
            if next_position is None:
                continue
            if next_position in set.union(kickable, might_kickable):
                # kick will be considered later
                continue
            if all([utility.position_is_flames(board, next_position),
                    info["flame_life"][next_position] > 1,
                    is_survivable[constants.Action.Stop]]):
                # if the next position is flames,
                # I want to stop to wait, which must be feasible
                block[action] = total_frac_blocked[next_position] * p_survivable[constants.Action.Stop]
                if teammate_position is not None:
                    block[action] *= (1 - total_frac_blocked_teammate[next_position])
                block[action] *= self._inv_tmp
                block[action] -=  np.log(-np.log(self.random.uniform()))
                continue
            elif not is_survivable[action]:
                continue
            if all([might_blocked[action],
                    not is_survivable[constants.Action.Stop]]):
                continue

            block[action] = total_frac_blocked[next_position] * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp            
            block[action] -=  np.log(-np.log(self.random.uniform()))
            if might_blocked[action]:
                block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop]
                                 + total_frac_blocked[next_position] * p_survivable[action]) / 2
                if teammate_position is not None:                    
                    block[action] *= (1 - total_frac_blocked_teammate[next_position])
                block[action] *= self._inv_tmp                
                block[action] -=  np.log(-np.log(self.random.uniform()))

        if is_survivable[constants.Action.Bomb]:
            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb)

            n_survivable_nodes_with_bomb = defaultdict(int)
            for enemy in my_enemies:
                # get survivable tree of the enemy
                rows, cols = np.where(board==enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable = search_time_expanded_network(list_boards_with_bomb,
                                                           enemy_position)
                n_survivable_nodes_with_bomb[enemy] = sum([len(positions) for positions in _survivable])

            n_with_bomb = sum([n_survivable_nodes_with_bomb[enemy] for enemy in my_enemies])
            n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies])
            if n_with_none == 0:
                total_frac_blocked_with_bomb = 0

                # place more bombs, so the stacked enemy cannot kick
                x, y = my_position
                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    next_position = (x + dx, y + dy)
                    following_position = (x + 2 * dx, y + 2 * dy)
                    if not self._on_board(following_position):
                        continue
                    if all([obs["bomb_life"][next_position] > 0,
                            board[following_position] > constants.Item.AgentDummy.value]):
                        total_frac_blocked_with_bomb = 1
            else:
                total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none

            if teammate_position is not None:
                # get survivable tree of the teammate
                _survivable = search_time_expanded_network(list_boards_with_bomb, teammate_position)
                n_survivable_nodes_with_bomb_teammate = sum([len(positions) for positions in _survivable])

                n_with_bomb = n_survivable_nodes_with_bomb_teammate
                n_with_none = n_survivable_nodes_teammate[my_teammate]
                if n_with_none == 0:
                    total_frac_blocked_with_bomb_teammate = 0
                else:
                    total_frac_blocked_with_bomb_teammate = 1 - n_with_bomb / n_with_none

            action = constants.Action.Bomb
            block[action] = total_frac_blocked_with_bomb * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_with_bomb_teammate)
            block[action] *= self._inv_tmp
            block[action] -=  np.log(-np.log(self.random.uniform()))

        for next_position in kickable:

            action = self._get_direction(my_position, next_position)
            if not is_survivable[action]:
                continue

            list_boards_with_kick, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=action,
                                       can_kick=True)

            n_survivable_nodes_with_kick = defaultdict(int)
            for enemy in my_enemies:
                # get survivable tree of the enemy
                rows, cols = np.where(board==enemy.value)
                if len(rows) == 0:
                    continue
                enemy_position = (rows[0], cols[0])
                _survivable = search_time_expanded_network(list_boards_with_kick,
                                                           enemy_position)
                n_survivable_nodes_with_kick[enemy] = sum([len(positions) for positions in _survivable])

                n_with_kick = sum([n_survivable_nodes_with_kick[enemy] for enemy in my_enemies])
                n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies])
                if n_with_none == 0:
                    total_frac_blocked[next_position] = 0
                else:
                    total_frac_blocked[next_position] = 1 - n_with_kick / n_with_none

            if teammate_position is not None:
                # get survivable tree of the teammate
                _survivable = search_time_expanded_network(list_boards_with_kick, teammate_position)
                n_survivable_nodes_with_kick_teammate = sum([len(positions) for positions in _survivable])

                n_with_kick = n_survivable_nodes_with_kick_teammate
                n_with_none = n_survivable_nodes_teammate[my_teammate]
                if n_with_none == 0:
                    total_frac_blocked_teammate[next_position] = 0
                else:
                    total_frac_blocked_teammate[next_position] = 1 - n_with_kick / n_with_none
            
            block[action] = total_frac_blocked[next_position] * p_survivable[action]
            if teammate_position is not None:
                block[action] *= (1 - total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp
            block[action] -=  np.log(-np.log(self.random.uniform()))

        max_block = -np.inf
        best_action = None
        for action in block:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp:
            if teammate_position is not None:
                teammate_safety = total_frac_blocked_with_bomb_teammate * n_survivable_nodes_with_bomb_teammate
                if any([teammate_safety > self._teammate_survivability_threshold,
                        total_frac_blocked_with_bomb_teammate < self._interfere_threshold,
                        total_frac_blocked_with_bomb_teammate < total_frac_blocked_teammate[my_position]]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if teammate_ok:
                if best_action == constants.Action.Bomb:                    
                    if verbose:
                        print("Bomb is best", constants.Action.Bomb)
                    return constants.Action.Bomb.value

                if best_action == constants.Action.Stop:
                    if verbose:
                        print("Place a bomb at a locally optimal position", constants.Action.Bomb)
                    return constants.Action.Bomb.value

        #
        # Move towards where to bomb
        #

        if best_action not in [None, constants.Action.Bomb]:
            next_position = my_next_position[best_action]

            should_chase = (total_frac_blocked[next_position] > self._chase_threshold)

            if teammate_position is not None:
                teammate_safety = total_frac_blocked_teammate[next_position] * n_survivable_nodes_teammate[my_teammate]
                if any([teammate_safety > self._teammate_survivability_threshold,
                        total_frac_blocked_teammate[next_position] < self._interfere_threshold,
                        total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position]]):
                    teammate_ok = True
                else:
                    teammate_ok = False
            else:
                teammate_ok = True

            if should_chase and teammate_ok:
                if all([utility.position_is_flames(board, next_position),
                        info["flame_life"][next_position] > 1,
                        is_survivable[constants.Action.Stop]]):
                    action = constants.Action.Stop
                    if verbose:
                        print("Wait flames life", action)
                    return action.value
                else:
                    if verbose:
                        print("Move towards better place to bomb", best_action)
                    return best_action.value                

        # Exclude the action representing stop to wait
        max_block = -np.inf
        best_action = None
        for action in survivable_actions:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action
                
        #
        # Do not take risky actions
        #

        most_survivable_action = self._action_most_survivable(n_survivable)

        # ignore actions with low survivability
        _survivable_actions = list()
        for action in n_survivable:
            n = sum(n_survivable[action])
            if not is_survivable[action]:
                continue
            elif n > self._my_survivability_threshold:
                _survivable_actions.append(action)
            else:
                print("RISKY", action)
                is_survivable[action] = False

        if len(_survivable_actions) > 1:
            survivable_actions = _survivable_actions
        elif best_action is not None:
            if verbose:
                print("Take the best action in danger", best_action)
            return best_action.value
        else:
            # Take the most survivable action
            if verbose:
                print("Take the most survivable action", most_survivable_action)
            return most_survivable_action.value

        #
        # Do not interfere with teammate
        #

        if all([teammate_position is not None,
                len(enemy_positions) > 0 or len(info["curr_bombs"]) > 0]):
            # ignore actions that interfere with teammate
            min_interfere = np.inf
            least_interfere_action = None
            _survivable_actions = list()
            for action in survivable_actions:
                if action == constants.Action.Bomb:
                    frac = total_frac_blocked_with_bomb_teammate
                else:
                    next_position = my_next_position[action]
                    frac = total_frac_blocked_teammate[next_position]
                if frac < min_interfere:
                    min_interfere = frac
                    least_interfere_action = action
                if frac < self._interfere_threshold:
                    _survivable_actions.append(action)
                else:
                    print("INTERFERE", action)
                    is_survivable[action] = False

            if len(_survivable_actions) > 1:
                survivable_actions = _survivable_actions
            elif best_action is not None:
                # Take the least interfering action
                if verbose:
                    print("Take the best action in intereference", best_action)
                return best_action.value
            else:
                if verbose:
                    print("Take the least interfering action", least_interfere_action)
                return least_interfere_action.value

        consider_bomb = True
        if not is_survivable[constants.Action.Bomb]:
            consider_bomb = False

        #
        # Find reachable items
        #

        # List of boards simulated
        list_boards, _ = self._board_sequence(board,
                                              info["curr_bombs"],
                                              info["curr_flames"],
                                              self._search_range,
                                              my_position,
                                              enemy_mobility=self._enemy_mobility,
                                              enemy_bomb=self._enemy_bomb,
                                              agent_blast_strength=info["agent_blast_strength"])

        # some bombs may explode with extra bombs, leading to under estimation
        for t in range(len(list_boards)):
            flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value)
            list_boards[t][flame_positions] = constants.Item.Flames.value
        
        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, _, _ = self._search_time_expanded_network(list_boards,
                                                                    my_position)        
        if len(survivable[-1]) == 0:
            survivable = [set() for _ in range(len(survivable))]

        # Items and bomb target that can be reached in a survivable manner
        _, _, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable)

        #
        # If I have seen an enemy recently and cannot see him now, them move to the last seen position
        #

        action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info,
                                       my_enemies)
        if action is not None:
            if verbose:
                print("Moving toward last seen enemy", action)
            return action.value            
        
        #
        # If I have seen a teammate recently, them move away from the last seen position
        #

        action = self._action_away_from_teammate(my_position,
                                                 next_to_items[constants.Item.Fog],
                                                 prev,
                                                 is_survivable,
                                                 info,
                                                 my_teammate)
        if action is not None:
            if verbose:
                print("Moving away from last seen teammate", action)
            return action.value            
        
        #
        # Move towards a fog where we have not seen longest
        #
        
        action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info)

        if action is not None:
            #if True:
            if self.random.uniform() < 0.8:
                if verbose:
                    print("Moving toward oldest fog", action)
                return action.value            

        #
        # Choose most survivable action
        #

        max_block = -np.inf
        best_action = None
        for action in survivable_actions:
            if action == constants.Action.Bomb:
                continue
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if verbose:
            print("Take the best action among safe actions (nothing else to do)", best_action)

        if best_action is None:
            # this should not be the case
            return None
        else:
            return best_action.value
예제 #11
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        #board = obs['board']
        board = info['recently_seen']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']

        fog_positions = np.where(board == constants.Item.Fog.value)
        board[fog_positions] = info["last_seen"][fog_positions]
        
        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, succ, _ \
            = self._search_time_expanded_network(info["list_boards_no_move"],
                                                 my_position)
        if len(survivable[-1]) == 0:
            survivable = [set() for _ in range(len(survivable))]

        # where to place bombs to break wood
        digging, bomb_target = self._get_digging_positions(board, my_position, info)

        if digging is None:
            bomb_target, n_breakable \
                = self._get_bomb_target(info["list_boards_no_move"][-1],
                                        my_position,
                                        my_blast_strength,
                                        constants.Item.Wood)

        # Items that can be reached in a survivable manner
        reachable_items, _, next_to_items \
            = self._find_reachable_items(info["list_boards_no_move"],
                                         my_position,
                                         survivable,
                                         bomb_target,
                                         info["might_powerup"])

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           info,
                                           step_to_collapse=info["step_to_collapse"],
                                           collapse_ring=info["collapse_ring"])
                                
        survivable_actions = [a for a in is_survivable if is_survivable[a]]

        #
        # Choose an action
        #

        if len(survivable_actions) == 0:

            # This should not happen
            return None

        elif len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            return action.value

        #
        # Place a bomb
        #

        consider_bomb = True
        if survivable_with_bomb is None:
            consider_bomb = False
        elif not bomb_target[my_position]:
            consider_bomb = False
        elif any([len(s) <= 0 for s in survivable_with_bomb]):
            # if not survivable all the time after bomb, do not bomb
            consider_bomb = False
        elif self._might_break_powerup(info["list_boards_no_move"][-1],
                                       my_position,
                                       my_blast_strength,
                                       info["might_powerup"]):
            # if might break an item, do not bomb
            consider_bomb = False

        if consider_bomb:
            # place bomb if I am at a bomb target
            return constants.Action.Bomb.value

        good_time_positions = reachable_items["target"]
        if digging and good_time_positions:
            time_to_reach = good_time_positions[0][0]
            if any([my_ammo and board[digging] in [constants.Item.Passage.value,
                                                   constants.Item.ExtraBomb.value,
                                                   constants.Item.IncrRange.value,
                                                   constants.Item.Kick.value],
                    info["flame_life"][digging] <= time_to_reach
                    and utility.position_is_flames(board, digging)]):
                action = self._find_distance_minimizer(my_position,
                                                       good_time_positions,
                                                       prev,
                                                       is_survivable)
                if action is not None:
                    return action.value
        
        # Move towards good items
        # TODO : kick may be a good item only if I cannot kick yet
        # TODO : might want to destroy
        good_items = [constants.Item.ExtraBomb, constants.Item.IncrRange, constants.Item.Kick]

        # positions with good items
        good_time_positions = set()
        for item in good_items:
            good_time_positions = good_time_positions.union(reachable_items[item])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)
            if action is not None:
                return action.value

        #
        # Move towards where to bomb
        #

        good_time_positions = reachable_items["target"]
        # If I have no bomb, I do not want to wait at the target that will be covered by flames
        # before I can place a bomb
        if my_ammo == 0:
            first_blast_time = constants.DEFAULT_BOMB_LIFE
            for t, x, y in reachable_items[constants.Item.Bomb]:
                life = obs["bomb_life"][(x,y)]
                if life < first_blast_time:
                    first_blast_time = life

            _good_time_positions = list()
            for t, x, y in good_time_positions:
                if any([t > first_blast_time,
                        info["list_boards_no_move"][int(first_blast_time)][(x, y)] != constants.Item.Flames.value]):
                    _good_time_positions.append((t, x, y))
            if _good_time_positions:
                good_time_positions = _good_time_positions
                
        action = self._find_distance_minimizer(my_position,
                                               good_time_positions,
                                               prev,
                                               is_survivable)
        if action is not None:
            return action.value

        #
        # Move toward might powerups
        #

        good_time_positions = reachable_items["might_powerup"]
        if len(good_time_positions):
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)
            if action is not None:
                return action.value
        
        #
        # Move towards a fog where we have not seen longest
        #

        best_time_position = None
        oldest = 0
        for t, x, y in next_to_items[constants.Item.Fog]:
            neighbors = [(x+dx, y+dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]]
            age = max([info["since_last_seen"][position] for position in neighbors if self._on_board(position)])
            age += self.random.uniform()
            if age > oldest:
                oldest = age
                best_time_position = (t, x, y)

        if best_time_position is not None:
            action = self._find_distance_minimizer(my_position,
                                                   [best_time_position],
                                                   prev,
                                                   is_survivable)
            if action is not None:
                return action.value
        
        #
        # Random action
        #

        if constants.Action.Bomb in survivable_actions:
            survivable_actions.remove(constants.Action.Bomb)
        
        action = self.random.choice(survivable_actions)
        return action.value
예제 #12
0
    def _get_flames(self, board, prev_board, prev_flame_life,
                    bomb_position_strength, moving_direction):
        """
        Summarize information about flames

        Parameters
        ----------
        board : array
            pommerman board
        prev_flame_life : array
            remaining life of flames in the previous step
        bomb_position_strength : list
           list of pairs of position and strength of bombs just exploded
        moving_direction : array
            direction of moving bombs

        Return
        ------
        curr_flames : list
            list of Flames
        flame_life : array
            remaining life of flames
        """

        # decrement the life of existing flames by 1
        flame_life = prev_flame_life - (prev_flame_life > 0)

        # set the life of new flames
        locations = np.where((prev_board != constants.Item.Flames.value) *
                             (board == constants.Item.Flames.value))
        flame_life[locations] = 3

        # set the life of overestimated flames at 0
        locations = np.where(board != constants.Item.Flames.value)
        flame_life[locations] = 0

        for (x, y), strength in bomb_position_strength:

            # for moving bombs, we cannot exactly tell whether it has stopped or not
            # so, consider both possibility

            dx = 0
            dy = 0
            if moving_direction[(x, y)] == constants.Action.Right:
                dy = 1
            elif moving_direction[(x, y)] == constants.Action.Left:
                dy = -1
            elif moving_direction[(x, y)] == constants.Action.Down:
                dx = 1
            elif moving_direction[(x, y)] == constants.Action.Up:
                dx = -1
            possible_positions = [(x, y)]
            if moving_direction[(x, y)] is not None:
                next_position = (x + dx, y + dy)
                if self._on_board(next_position):
                    possible_positions.append(next_position)
            """
            # there is also a possibility that a bomb just started to move, or the direction is changed by kicking
            for (dx, dy) in [(1, 0), (-1, 0), (0, 1), (0, -1)]:
                agent_position = (x + dx, y + dy)
                if not self._on_board(agent_position):
                    continue
                if not utility.position_is_agent(prev_board, agent_position):
                    continue               
                # the agent might have kicked
                next_position = (x - dx, y - dy)
                if self._on_board(next_position):
                    possible_positions.append(next_position)
            """

            for (xx, yy) in possible_positions:
                if not utility.position_is_flames(board, (xx, yy)):
                    # not exploded yet
                    continue
                # To up and stop
                for dx in range(0, strength):
                    position = (xx + dx, yy)
                    if not self._on_board(position):
                        break
                    elif utility.position_is_flames(board, position):
                        flame_life[position] = 3
                # To down
                for dx in range(1, strength):
                    position = (xx - dx, yy)
                    if not self._on_board(position):
                        break
                    elif utility.position_is_flames(board, position):
                        flame_life[position] = 3
                # To right
                for dy in range(1, strength):
                    position = (xx, yy + dy)
                    if not self._on_board(position):
                        break
                    elif utility.position_is_flames(board, position):
                        flame_life[position] = 3
                # To left
                for dy in range(1, strength):
                    position = (xx, yy - dy)
                    if not self._on_board(position):
                        break
                    elif utility.position_is_flames(board, position):
                        flame_life[position] = 3

        curr_flames = list()
        rows, cols = np.where(flame_life > 0)
        for position in zip(rows, cols):
            flame = characters.Flame(position, flame_life[position] - 1)
            curr_flames.append(flame)

        return curr_flames, flame_life
예제 #13
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        self._search_range = 10

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']

        if verbose:
            print("My position", my_position, end="\t")

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable, prev, succ, _ \
            = self._search_time_expanded_network(info["list_boards_no_move"],
                                                 my_position)

        # where to place bombs to break wood
        bomb_target = np.full(board.shape, False)
        digging = None
        if board[my_position] == constants.Item.Agent0.value:
            for n in [4, 5, 6]:
                if utility.position_is_wood(info["last_seen"], (1, n)):
                    bomb_target[(1, n - 1)] = True
                    digging = (1, n - 1)
                    break
        elif board[my_position] == constants.Item.Agent1.value:
            for m in [6, 5, 4]:
                if utility.position_is_wood(info["last_seen"], (m, 1)):
                    bomb_target[(m + 1, 1)] = True
                    digging = (m + 1, 1)
                    break
        elif board[my_position] == constants.Item.Agent2.value:
            for m in [6, 5, 4]:
                if utility.position_is_wood(info["last_seen"], (m, 9)):
                    bomb_target[(m + 1, 9)] = True
                    digging = (m + 1, 9)
                    break
        elif board[my_position] == constants.Item.Agent3.value:
            for n in [6, 5, 4]:
                if utility.position_is_wood(info["last_seen"], (1, n)):
                    bomb_target[(1, n + 1)] = True
                    digging = (1, n + 1)
                    break
        if digging is None:
            bomb_target, n_breakable \
                = self._get_bomb_target(info["list_boards_no_move"][-1],
                                        my_position,
                                        my_blast_strength,
                                        constants.Item.Wood)

        # Items that can be reached in a survivable manner
        reachable_items, _, next_to_items \
            = self._find_reachable_items(info["list_boards_no_move"],
                                         my_position,
                                         survivable,
                                         bomb_target)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           info["curr_bombs"],
                                           info["curr_flames"])

        survivable_actions = [a for a in is_survivable if is_survivable[a]]

        if verbose:
            print("survivable actions are", survivable_actions)

        #
        # Choose an action
        #

        if len(survivable_actions) == 0:

            # This should not happen
            return None

        elif len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value

        #
        # Place a bomb
        #

        consider_bomb = True
        if survivable_with_bomb is None:
            consider_bomb = False
        elif not bomb_target[my_position]:
            consider_bomb = False
        elif any([len(s) <= 0 for s in survivable_with_bomb]):
            # if not survivable all the time after bomb, do not bomb
            consider_bomb = False
        elif self._might_break_powerup(info["list_boards_no_move"][-1],
                                       my_position, my_blast_strength,
                                       info["might_powerup"]):
            # if might break an item, do not bomb
            consider_bomb = False

        if consider_bomb:
            # place bomb if I am at a bomb target
            print("Bomb at a bomb target", constants.Action.Bomb)
            return constants.Action.Bomb.value

        good_time_positions = reachable_items["target"]
        if digging and good_time_positions:
            time_to_reach = good_time_positions[0][0]
            if any([
                    my_ammo and board[digging] in [
                        constants.Item.Passage.value,
                        constants.Item.ExtraBomb.value,
                        constants.Item.IncrRange.value,
                        constants.Item.Kick.value
                    ], info["flame_life"][digging] <= time_to_reach
                    and utility.position_is_flames(board, digging)
            ]):
                action = self._find_distance_minimizer(my_position,
                                                       good_time_positions,
                                                       prev, is_survivable)
                if action is not None:
                    print("Move to dig", action)
                    return action.value

        # Move towards good items
        # TODO : kick may be a good item only if I cannot kick yet
        # TODO : might want to destroy
        good_items = [
            constants.Item.ExtraBomb, constants.Item.IncrRange,
            constants.Item.Kick
        ]

        # positions with good items
        good_time_positions = set()
        for item in good_items:
            good_time_positions = good_time_positions.union(
                reachable_items[item])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions, prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward good item", action)
                return action.value

        #
        # Move towards where to bomb
        #

        good_time_positions = reachable_items["target"]

        # If I have no bomb, I do not want to wait at the target that will be covered by flames
        # before I can place a bomb
        if my_ammo == 0:
            first_blast_time = constants.DEFAULT_BOMB_LIFE
            for t, x, y in reachable_items[constants.Item.Bomb]:
                life = obs["bomb_life"][(x, y)]
                if life < first_blast_time:
                    first_blast_time = life

            _good_time_positions = list()
            for t, x, y in good_time_positions:
                if any([
                        t > first_blast_time,
                        info["list_boards_no_move"][int(first_blast_time)][(
                            x, y)] != constants.Item.Flames.value
                ]):
                    _good_time_positions.append((t, x, y))
            if _good_time_positions:
                good_time_positions = _good_time_positions

        action = self._find_distance_minimizer(my_position,
                                               good_time_positions, prev,
                                               is_survivable)
        if action is not None:
            print("Moving toward where to bomb", action)
            return action.value

        #
        # TODO : move toward might powerups
        #

        #
        # Move towards a fog where we have not seen longest
        #

        best_time_position = None
        oldest = 0
        for t, x, y in next_to_items[constants.Item.Fog]:
            neighbors = [(x + dx, y + dy)
                         for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]]
            age = max([
                info["since_last_seen"][position] for position in neighbors
                if self._on_board(position)
            ])
            if age > oldest:
                oldest = age
                best_time_position = (t, x, y)

        if best_time_position is not None:
            action = self._find_distance_minimizer(my_position,
                                                   [best_time_position], prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward oldest fog", action)
                return action.value

        #
        # Random action
        #

        if constants.Action.Bomb in survivable_actions:
            survivable_actions.remove(constants.Action.Bomb)

        action = random.choice(survivable_actions)
        print("Random action", action, survivable_actions)
        return action.value