Exemple #1
0
def position_is_in_corridor(board, position, perpendicular_dirs):
    d1 = perpendicular_dirs[0]
    d2 = perpendicular_dirs[1]
    p1 = utility.get_next_position(position, d1)
    p2 = utility.get_next_position(position, d2)

    con1 = ((not utility.position_on_board(board, p1))
            or utility.position_is_wall(board, p1))
    con2 = ((not utility.position_on_board(board, p2))
            or utility.position_is_wall(board, p2))
    return con1 and con2
Exemple #2
0
    def _is_closed(self, board, position):
        """
        Check whether the position is srounded by Wood/Rigid.

        Parameters
        ----------
        board = np.array(obs['board'])

        position = tuple(obs['position'])
        """

        is_done = np.full(board.shape, False)
        is_done[position] = True
        to_search = [position]

        while to_search:
            x, y = to_search.pop()
            for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                new_position = (x + dx, y + dy)
                if not self._on_board(new_position):
                    continue
                if is_done[new_position]:
                    continue
                is_done[new_position] = True
                if utility.position_is_agent(board, new_position):
                    return False
                if utility.position_is_wall(board, new_position):
                    continue
                if utility.position_is_fog(board, new_position):
                    continue
                to_search.append(new_position)

        return True
Exemple #3
0
def me_to_enemy_all_corridor(board, pos1, pos2):
    assert (pos1[0] == pos2[0] or pos1[1] == pos2[1])
    if pos1[0] == pos2[0]:
        if pos1[1] < pos2[1]:
            direction = constants.Action.Right
        else:
            direction = constants.Action.Left
    else:
        if pos1[0] < pos2[0]:
            direction = constants.Action.Down
        else:
            direction = constants.Action.Up
    p_dirs = perpendicular_directions(direction)
    pos2_next = utility.get_next_position(pos2, direction)
    next_is_impasse = (not utility.position_on_board(
        board, pos2_next)) or utility.position_is_wall(board, pos2_next)
    if utility.position_on_board(board, pos2_next) and utility.position_is_fog(
            board, pos2_next):
        next_is_impasse = False
    if not (position_is_in_corridor(board, pos2, p_dirs) and next_is_impasse):
        # pos2:enempy must be in impasse
        return False
    all_corridor_flag = True
    pos = utility.get_next_position(pos1, direction)
    while pos != pos2:
        if not (utility.position_is_passage(board, pos)):
            all_corridor_flag = False
            break
        if not position_is_in_corridor(board, pos, p_dirs):
            all_corridor_flag = False
            break
        pos = utility.get_next_position(pos, direction)
    return all_corridor_flag
 def bomb_real_life(bomb_position, board, bomb_blast_st, bomb_life):
     """One bomb's real life is the minimum life of its adjacent bomb. 
        Not that this could be chained, so please call it on each bomb mulitple times until
        converge
     """
     x, y = bomb_position
     i = x
     min_life = 900
     sz = len(board)
     while i >= 0:
         pos = (i, y)
         dist = abs(i - x)
         if utility.position_is_wall(board, pos):
             break
         if bomb_life[pos] > 0 and dist <= bomb_blast_st[pos] - 1:
             min_life = int(bomb_life[pos])
         i -= 1
     i = x
     while i < sz:
         pos = (i, y)
         dist = abs(i - x)
         if utility.position_is_wall(board, pos):
             break
         if bomb_life[pos] > 0 and dist <= bomb_blast_st[pos] - 1:
             min_life = int(bomb_life[pos])
         i += 1
     j = y
     while j >= 0:
         pos = (x, j)
         dist = abs(j - y)
         if utility.position_is_wall(board, pos):
             break
         if bomb_life[pos] > 0 and dist <= bomb_blast_st[pos] - 1:
             min_life = int(bomb_life[pos])
         j -= 1
     j = y
     while j < sz:
         pos = (x, j)
         dist = abs(j - y)
         if utility.position_is_wall(board, pos):
             break
         if bomb_life[pos] > 0 and dist <= bomb_blast_st[pos] - 1:
             min_life = int(bomb_life[pos])
         j += 1
     return min_life
Exemple #5
0
def _stop_condition(board, pos, exclude_agent=True):
    if not utility.position_on_board(board, pos):
        return True
    if utility.position_is_fog(board, pos):
        return True
    if utility.position_is_wall(board, pos):
        return True
    if not exclude_agent:
        if utility.position_is_agent(board, pos):
            return True
    return False
def _position_will_be_flamed(board, position, bomb_life, bomb_blast_st,
                             directions_to_check):
    for direction in directions_to_check:
        pos = utility.get_next_position(position, direction)
        k = 1
        while utility.position_on_board(board, pos):
            if utility.position_is_wall(board, pos):
                break
            if bomb_life[pos] > 0 and bomb_blast_st[pos] - 1 >= k:
                return True
            pos = utility.get_next_position(pos, direction)
            k += 1
    return False
Exemple #7
0
def CanGo(bfs_node, action, enemies):
    position_x, position_y = utility.get_next_position(bfs_node.my_position,
                                                       action)
    if not utility.is_valid_direction(bfs_node.env._board,
                                      (position_x, position_y), action):
        return False
    if not utility.position_is_passable(bfs_node.env._board,
                                        (position_x, position_y), enemies):
        return False
    can_go = True
    bombs = bfs_node.env._bombs
    for bomb in bombs:
        bomb_x, bomb_y = bomb.position
        safe_from_bomb = False
        if bomb.life > bomb.blast_strength + 2: continue
        if (bomb_x == position_x
                and abs(bomb_y - position_y) <= bomb.blast_strength):
            for pix_y in range(min(bomb_y, position_y),
                               max(bomb_y, position_y)):
                if utility.position_is_wall(bfs_node.env._board,
                                            (position_x, pix_y)):
                    safe_from_bomb = True
                    break
        elif (bomb_y == position_y
              and abs(bomb_x - position_x) <= bomb.blast_strength):
            for pix_x in range(min(bomb_x, position_x),
                               max(bomb_x, position_x)):
                if utility.position_is_wall(bfs_node.env._board,
                                            (pix_x, position_y)):
                    safe_from_bomb = True
                    break
        else:
            continue
        if not safe_from_bomb:
            can_go = False
    return can_go
Exemple #8
0
def _all_bomb_real_life(board, bomb_life, bomb_blast_st):
    def get_bomb_real_life(bomb_position, bomb_real_life):
        """One bomb's real life is the minimum life of its adjacent bomb.
           Not that this could be chained, so please call it on each bomb mulitple times until
           converge
        """
        dirs = _all_directions(exclude_stop=True)
        min_life = bomb_real_life[bomb_position]
        for d in dirs:
            pos = bomb_position
            last_pos = bomb_position
            while True:
                pos = utility.get_next_position(pos, d)
                if _stop_condition(board, pos):
                    break
                if bomb_real_life[pos] > 0:
                    if bomb_real_life[pos] < min_life and \
                            _manhattan_distance(pos, last_pos) <= bomb_blast_st[pos] - 1:
                        min_life = bomb_real_life[pos]
                        last_pos = pos
                    else:
                        break
        return min_life

    bomb_real_life_map = np.copy(bomb_life)
    sz = len(board)
    while True:
        no_change = []
        for i in range(sz):
            for j in range(sz):
                if utility.position_is_wall(board, (i, j)) or utility.position_is_powerup(board, (i, j)) \
                        or utility.position_is_fog(board, (i, j)):
                    continue
                if bomb_life[i, j] < 0 + EPSILON:
                    continue
                real_life = get_bomb_real_life((i, j), bomb_real_life_map)
                no_change.append(bomb_real_life_map[i, j] == real_life)
                bomb_real_life_map[i, j] = real_life
        if all(no_change):
            break
    return bomb_real_life_map
Exemple #9
0
    def _make_safety_score(cls, board, items, bombs, enemies):
        safety_score = np.ones(board.shape)
        for bomb in bombs:
            x, y = bomb["position"]
            bomb_range = bomb["blast_strength"]
            safety_score[(x, y)] = -np.inf
            for dx in range(1, bomb_range):
                if x + dx >= len(board):
                    break
                position = (x + dx, y)
                if utility.position_is_rigid(board, position):
                    #safety_score[position] = -np.inf
                    break
                safety_score[position] = -np.inf
            for dx in range(1, bomb_range):
                if x - dx < 0:
                    break
                position = (x - dx, y)
                if utility.position_is_rigid(board, position):
                    #safety_score[position] = -np.inf
                    break
                safety_score[position] = -np.inf
            for dy in range(1, bomb_range):
                if y + dy >= len(board[0]):
                    break
                position = (x, y + dy)
                if utility.position_is_rigid(board, position):
                    #safety_score[position] = -np.inf
                    break
                safety_score[position] = -np.inf
            for dy in range(1, bomb_range):
                if y - dy < 0:
                    break
                position = (x, y - dy)
                if utility.position_is_rigid(board, position):
                    #safety_score[position] = -np.inf
                    break
                safety_score[position] = -np.inf

        # wall
        for x in range(len(board)):
            for y in range(len(board)):
                position = (x, y)
                if utility.position_is_wall(board, position):
                    safety_score[position] = -np.inf

        is_safe = (safety_score == 1)

        safety_score[1:, :] += is_safe[:-1, :]
        safety_score[:-1, :] += is_safe[1:, :]
        safety_score[:, 1:] += is_safe[:, :-1]
        safety_score[:, :-1] += is_safe[:, 1:]

        # enemies
        for enemy in enemies:
            for position in items.get(enemy, []):
                x, y = position
                safety_score[position] -= 1
                if x > 0:
                    safety_score[(x - 1, y)] -= 1
                if y > 0:
                    safety_score[(x, y - 1)] -= 1
                if x < len(board) - 1:
                    safety_score[(x + 1, y)] -= 1
                if y < len(board) - 1:
                    safety_score[(x, y + 1)] -= 1

        return safety_score
Exemple #10
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = info['last_seen']
        #board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_kick = obs["can_kick"]  # whether I can kick
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]

        kickable, might_kickable \
            = self._kickable_positions(obs, info["moving_direction"],
                                       consider_agents=True)

        # enemy positions
        enemy_positions = list()
        for enemy in my_enemies:
            rows, cols = np.where(board == enemy.value)
            if len(rows) == 0:
                continue
            enemy_positions.append((rows[0], cols[0]))

        # teammate position
        teammate_position = None
        if my_teammate is not None:
            rows, cols = np.where(board == my_teammate.value)
            if len(rows):
                teammate_position = (rows[0], cols[0])

        # positions that might be blocked
        if teammate_position is None:
            agent_positions = enemy_positions
        else:
            agent_positions = enemy_positions + [teammate_position]
        might_blocked = self._get_might_blocked(board, my_position,
                                                agent_positions,
                                                might_kickable)

        #
        # Survivability, when enemy is replaced by a bomb, and no move afterwards
        #

        # replace enemy with bomb
        _bombs = deepcopy(info["curr_bombs"])
        rows, cols = np.where(board > constants.Item.AgentDummy.value)
        for position in zip(rows, cols):
            if board[position] not in my_enemies:
                continue
            if obs["bomb_blast_strength"][position]:
                # already a bomb
                continue
            bomb = characters.Bomb(
                characters.Bomber(),  # dummy owner of the bomb
                position,
                constants.DEFAULT_BOMB_LIFE,
                enemy_blast_strength_map[position],
                None)
            _bombs.append(bomb)

        n_survivable_bomb = self._get_n_survivable(board,
                                                   _bombs,
                                                   info["curr_flames"],
                                                   obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=0)

        #
        # Survivability, when enemy moves one position or stay unmoved
        #

        n_survivable_move = self._get_n_survivable(board,
                                                   info["curr_bombs"],
                                                   info["curr_flames"],
                                                   obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=1)

        #
        # Survivability, when no enemies
        #

        _board = deepcopy(board)
        agent_positions = np.where(_board > constants.Item.AgentDummy.value)
        _board[agent_positions] = constants.Item.Passage.value
        _board[my_position] = board[my_position]

        _obs = {
            "position": obs["position"],
            "blast_strength": obs["blast_strength"],
            "ammo": obs["ammo"],
            "bomb_life": obs["bomb_life"],
            "board": _board
        }

        n_survivable_none = self._get_n_survivable(_board,
                                                   info["curr_bombs"],
                                                   info["curr_flames"],
                                                   _obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=0)

        #
        # Survivable actions
        #

        survivable_actions_bomb = set(
            [a for a in n_survivable_bomb if n_survivable_bomb[a][-1] > 0])
        survivable_actions_move = set(
            [a for a in n_survivable_move if n_survivable_move[a][-1] > 0])
        survivable_actions_none = set(
            [a for a in n_survivable_none if n_survivable_none[a][-1] > 0])

        survivable_actions = set.intersection(survivable_actions_bomb,
                                              survivable_actions_move,
                                              survivable_actions_none)

        # if can survive without possibility of being blocked, then do so
        if not constants.Action.Stop in survivable_actions:
            _survivable_actions = [
                action for action in survivable_actions
                if not might_blocked[action]
            ]
            if len(_survivable_actions):
                survivable_action = _survivable_actions

            _survivable_actions_bomb = [
                action for action in survivable_actions_bomb
                if not might_blocked[action]
            ]
            _survivable_actions_move = [
                action for action in survivable_actions_move
                if not might_blocked[action]
            ]
            _survivable_actions_none = [
                action for action in survivable_actions_none
                if not might_blocked[action]
            ]
            if all([
                    len(_survivable_actions_bomb) > 0,
                    len(_survivable_actions_move) > 0,
                    len(_survivable_actions_none) > 0
            ]):
                survivable_action_bomb = _survivable_actions_bomb
                survivable_action_move = _survivable_actions_move
                survivable_action_none = _survivable_actions_none

        #
        # Choose actions
        #

        if len(survivable_actions) == 1:

            action = survivable_actions.pop()
            if verbose:
                print("Only survivable action", action)
            return action.value

        if len(survivable_actions) > 1:

            n_survivable_expected = dict()
            for a in survivable_actions:
                if might_blocked[a]:
                    n_survivable_expected[a] \
                        = np.array(n_survivable_bomb[a]) \
                        + np.array(n_survivable_move[constants.Action.Stop]) \
                        + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                elif a in [constants.Action.Stop, constants.Action.Bomb]:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
                else:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
            action = self._get_most_survivable_action(n_survivable_expected)
            if verbose:
                print("Most survivable action", action)
            return action.value

        # no survivable actions for all cases
        survivable_actions = set(
            list(n_survivable_bomb.keys()) + list(n_survivable_move.keys()) +
            list(n_survivable_none.keys()))

        if len(survivable_actions) == 1:

            action = survivable_actions.pop()
            if verbose:
                print("Only might survivable action", action)
            return action.value

        if len(survivable_actions) > 1:

            for a in set.union(survivable_actions, {constants.Action.Stop}):
                if a not in n_survivable_bomb:
                    n_survivable_bomb[a] = np.zeros(self._search_range)
                if a not in n_survivable_move:
                    n_survivable_move[a] = np.zeros(self._search_range)
                if a not in n_survivable_none:
                    n_survivable_none[a] = np.zeros(self._search_range)

            n_survivable_expected = dict()
            for a in survivable_actions:
                if might_blocked[a]:
                    n_survivable_expected[a] \
                        = np.array(n_survivable_bomb[a]) \
                        + np.array(n_survivable_move[constants.Action.Stop]) \
                        + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 3
                elif a in [constants.Action.Stop, constants.Action.Bomb]:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_move[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
                else:
                    n_survivable_expected[a] = np.array(
                        n_survivable_bomb[a]) + np.array(n_survivable_none[a])
                    n_survivable_expected[a] = n_survivable_expected[a] / 2
            action = self._get_most_survivable_action(n_survivable_expected)
            if verbose:
                print("Most might survivable action", action)
            return action.value

        # no survivable action found for any cases
        # TODO : Then consider killing enemies or helping teammate

        # fraction of blocked node in the survival trees of enemies
        _list_boards = deepcopy(info["list_boards_no_move"])
        if obs["bomb_blast_strength"][my_position]:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Bomb.value
        else:
            for b in _list_boards:
                if utility.position_is_agent(b, my_position):
                    b[my_position] = constants.Item.Passage.value

        total_frac_blocked, n_survivable_nodes, blocked_time_positions \
            = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"])

        if teammate_position is not None:
            total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \
                = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"])

        block = defaultdict(float)
        for action in [
                constants.Action.Stop, constants.Action.Up,
                constants.Action.Down, constants.Action.Left,
                constants.Action.Right
        ]:

            next_position = self._get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue

            if board[next_position] in [
                    constants.Item.Rigid.value, constants.Item.Wood.value
            ]:
                continue

            if next_position in set.union(kickable, might_kickable):
                # kick will be considered later
                continue

            block[action] = total_frac_blocked[next_position]
            if teammate_position is not None:
                block[action] *= (1 -
                                  total_frac_blocked_teammate[next_position])
            block[action] *= self._inv_tmp
            block[action] -= np.log(-np.log(self.random.uniform()))

        if any([my_ammo > 0, obs["bomb_blast_strength"][my_position] == 0]):

            list_boards_with_bomb, _ \
                = self._board_sequence(board,
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_blast_strength=my_blast_strength,
                                       my_action=constants.Action.Bomb)

            block[constants.Action.Bomb] \
                = self._get_frac_blocked_two_lists(list_boards_with_bomb,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies)

            if teammate_position is not None:
                block_teammate = self._get_frac_blocked_two_lists(
                    list_boards_with_bomb, n_survivable_nodes, board,
                    [my_teammate])
                block[constants.Action.Bomb] *= (1 - block_teammate)

            block[constants.Action.Bomb] *= self._inv_tmp
            block[constants.Action.Bomb] -= np.log(
                -np.log(self.random.uniform()))

        for next_position in set.union(kickable, might_kickable):

            my_action = self._get_direction(my_position, next_position)

            list_boards_with_kick, _ \
                = self._board_sequence(obs["board"],
                                       info["curr_bombs"],
                                       info["curr_flames"],
                                       self._search_range,
                                       my_position,
                                       my_action=my_action,
                                       can_kick=True)

            block[my_action] \
                = self._get_frac_blocked_two_lists(list_boards_with_kick,
                                                   n_survivable_nodes,
                                                   board,
                                                   my_enemies)

            if teammate_position is not None:
                block_teammate = self._get_frac_blocked_two_lists(
                    list_boards_with_kick, n_survivable_nodes, board,
                    [my_teammate])
                block[my_action] *= (1 - block_teammate)

            block[my_action] *= self._inv_tmp
            block[my_action] -= np.log(-np.log(self.random.uniform()))

        max_block = -np.inf
        best_action = None
        for action in block:
            if block[action] > max_block:
                max_block = block[action]
                best_action = action

        if best_action is not None:
            if verbose:
                print(
                    "Best action to kill enemies or help teammate (cannot survive)"
                )
            return best_action.value

        # The following will not be used

        if obs["ammo"] > 0 and obs["blast_strength"] == 0:
            action = constants.Action.Bomb
            if verbose:
                print("Suicide", action)
                return action.value

        kickable_positions = list(set.union(kickable, might_kickable))
        if kickable_positions:
            self.random.shuffle(kickable_positions)
            action = self._get_direction(my_position, kickable_positions[0])
            if verbose:
                print("Suicide kick", action)
                return action.value

        all_actions = [
            constants.Action.Stop, constants.Action.Up, constants.Action.Down,
            constants.Action.Right, constants.Action.Left
        ]
        self.random.shuffle(all_actions)
        for action in all_actions:
            next_position = self._get_next_position(my_position, action)
            if not self._on_board(next_position):
                continue
            if utility.position_is_wall(board, next_position):
                continue
            if verbose:
                print("Random action", action)
                return action.value

        action = constants.Action.Stop
        if verbose:
            print("No action found", action)
        return action.value
Exemple #11
0
    def act(self, obs, action_space, info):

        #
        # Definitions
        #

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_kick = obs["can_kick"]  # whether I can kick
        my_enemies = [constants.Item(e) for e in obs['enemies']]
        my_teammate = obs["teammate"]

        kickable, might_kickable \
            = self._kickable_positions(obs, info["moving_direction"],
                                       consider_agents=True)

        #
        # Survivability, when enemy is replaced by a bomb, and no move afterwards
        #

        # replace enemy with bomb
        _bombs = deepcopy(info["curr_bombs"])
        rows, cols = np.where(board > constants.Item.AgentDummy.value)
        for position in zip(rows, cols):
            if board[position] not in my_enemies:
                continue
            if obs["bomb_blast_strength"][position]:
                # already a bomb
                continue
            bomb = characters.Bomb(
                characters.Bomber(),  # dummy owner of the bomb
                position,
                constants.DEFAULT_BOMB_LIFE,
                enemy_blast_strength_map[position],
                None)
            _bombs.append(bomb)

        n_survivable_bomb = self._get_n_survivable(board,
                                                   _bombs,
                                                   info["curr_flames"],
                                                   obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=0)
        print("survivable bomb")
        for a in n_survivable_bomb:
            print(a, n_survivable_bomb[a])

        survivable_actions_bomb = set(n_survivable_bomb)

        #
        # Survivability, when enemy moves one position or stay unmoved
        #

        n_survivable_move = self._get_n_survivable(board,
                                                   info["curr_bombs"],
                                                   info["curr_flames"],
                                                   obs,
                                                   my_position,
                                                   set.union(
                                                       kickable,
                                                       might_kickable),
                                                   enemy_mobility=1)

        # If my move is survivable with bomb but not with move,
        # then my move must be blocked by an enemy.
        # I might be blocked by an enemy with such my move,
        # it will end up in stop and enemy is also stop,
        # so my survivability with such my move should be the
        # same as my survivability with stop when enemy stops

        if constants.Action.Stop in survivable_actions_bomb:
            for action in survivable_actions_bomb:
                if action in [constants.Action.Stop, constants.Action.Bomb]:
                    continue
                if action not in n_survivable_move:
                    n_survivable_move[action] = n_survivable_bomb[
                        constants.Action.Stop]

        survivable_actions_move = set(n_survivable_move)

        #print("survivable move")
        #for a in n_survivable_move:
        #    print(a, n_survivable_move[a])

        # if survivable by not stopping when enemy place a bomb,
        # then do not stop
        if survivable_actions_bomb - {constants.Action.Stop}:
            survivable_actions_bomb -= {constants.Action.Stop}
            survivable_actions_move -= {constants.Action.Stop}
        #if survivable_actions_bomb - {constants.Action.Bomb}:
        #    survivable_actions_bomb -= {constants.Action.Bomb}
        #    survivable_actions_move -= {constants.Action.Bomb}

        survivable_actions = set.intersection(survivable_actions_bomb,
                                              survivable_actions_move)

        #print("survivable", survivable_actions)

        if len(survivable_actions) == 0:
            if survivable_actions_bomb:
                action = self._get_most_survivable_action(n_survivable_bomb)
                print("Most survivable action when enemy place a bomb", action)
                return action.value
            elif survivable_actions_move:
                action = self._get_most_survivable_action(n_survivable_move)
                print("Most survivable action when enemy moves", action)
                return action.value
            else:
                #
                # Survivability with no enemies or teammate
                #

                _board = deepcopy(board)
                agent_positions = np.where(
                    _board > constants.Item.AgentDummy.value)
                _board[agent_positions] = constants.Item.Passage.value
                _board[my_position] = board[my_position]

                _obs = {
                    "position": obs["position"],
                    "blast_strength": obs["blast_strength"],
                    "ammo": obs["ammo"],
                    "bomb_life": obs["bomb_life"],
                    "board": _board
                }

                n_survivable = self._get_n_survivable(_board,
                                                      info["curr_bombs"],
                                                      info["curr_flames"],
                                                      _obs,
                                                      my_position,
                                                      set.union(
                                                          kickable,
                                                          might_kickable),
                                                      enemy_mobility=0)

                survivable_actions = list(n_survivable)

                if survivable_actions:
                    action = self._get_most_survivable_action(n_survivable)
                    print("Most survivable action when no enemy", action)
                    return action.value
                else:
                    if obs["ammo"] > 0 and obs["blast_strength"] == 0:
                        action = constants.Action.Bomb
                        print("Suicide", action)
                        return action.value
                    else:
                        all_actions = [
                            constants.Action.Stop, constants.Action.Up,
                            constants.Action.Down, constants.Action.Right,
                            constants.Action.Left
                        ]
                        random.shuffle(all_actions)
                        for action in all_actions:
                            next_position = self._get_next_position(
                                my_position, action)
                            if not self._on_board(next_position):
                                continue
                            if not utility.position_is_wall(
                                    board, next_position):
                                continue
                            print("Random action", action)
                            return action.value

        elif len(survivable_actions) == 1:

            action = survivable_actions.pop()
            print("Only survivable action", action)
            return action.value

        else:

            n_survivable_min = dict()
            for a in survivable_actions:
                n_survivable_min[a] = min(
                    [n_survivable_bomb[a], n_survivable_move[a]])
            action = self._get_most_survivable_action(n_survivable_min)
            print("Most survivable action when no enemy", action)
            return action.value

        action = constants.Action.Stop
        print("No action found", action)
        return action.value
Exemple #12
0
def get_intermediate_rewards(prev_observations, cur_observations,
                             position_queue):
    # Note: only for team env
    r = [0.0, 0.0, 0.0, 0.0]
    for i in range(4):
        prev_alive = prev_observations[i]['alive']
        prev_n_enemy = 0
        for e in prev_observations[i]['enemies']:
            if e.value in prev_alive:
                prev_n_enemy += 1

        prev_n_teammate = 1 if prev_observations[i][
            'teammate'].value in prev_alive else 0
        prev_can_kick = prev_observations[i]['can_kick']
        prev_n_ammo = prev_observations[i]['ammo']
        prev_n_blast = prev_observations[i]['blast_strength']
        prev_position = prev_observations[i]['position']
        prev_wood_positions = list(
            zip(*np.where(
                prev_observations[i]['board'] == constants.Item.Wood.value)))

        cur_alive = cur_observations[i]['alive']
        cur_n_enemy = 0
        for e in cur_observations[i]['enemies']:
            if e.value in cur_alive:
                cur_n_enemy += 1

        cur_n_teammate = 1 if cur_observations[i][
            'teammate'].value in cur_alive else 0
        cur_can_kick = cur_observations[i]['can_kick']
        cur_n_ammo = cur_observations[i]['ammo']
        cur_n_blast = cur_observations[i]['blast_strength']
        cur_position = cur_observations[i]['position']

        if prev_n_enemy - cur_n_enemy > 0:
            r[i] += (prev_n_enemy - cur_n_enemy) * 0.5
        if prev_n_teammate - cur_n_teammate > 0:
            r[i] -= (prev_n_teammate - cur_n_teammate) * 0.5
        if not prev_can_kick and cur_can_kick:
            r[i] += 0.02
        if cur_n_ammo - prev_n_ammo > 0:
            r[i] += 0.00
        if cur_n_blast - prev_n_blast > 0:
            r[i] += 0.00
        if cur_position not in position_queue:
            r[i] += 0.000
            position_queue.append(cur_position)
        for row, col in prev_wood_positions:
            cur_board = cur_observations[i]['board']
            if not utility.position_is_wall(
                    cur_board, (row, col)) and not utility.position_is_fog(
                        cur_board, (row, col)):
                r[i] += 0.000

    #0 2 teammates, 1 3 teammates
    team_spirit = 0.2
    r0 = r[0] * (1 - team_spirit) + team_spirit * r[2]
    r1 = r[1] * (1 - team_spirit) + team_spirit * r[3]
    r2 = r[2] * (1 - team_spirit) + team_spirit * r[0]
    r3 = r[3] * (1 - team_spirit) + team_spirit * r[1]

    mean1 = (r0 + r2) / 2.0
    mean2 = (r1 + r3) / 2.0
    #make sure it is zero-sum
    r = [r0 - mean2, r1 - mean1, r2 - mean2, r3 - mean1]
    #print(r)
    return r
Exemple #13
0
    def _get_breakable(self, board, my_position, blast_strength, target_item):
        """
        For each position in board, count the number of woods that can be broken
        by placing a bomb with the given blast strength at that position
        """

        n_breakable = np.zeros(board.shape)
        broken_by = defaultdict(
            list)  # the bomb positions where each item will be broken
        to_break = defaultdict(
            list)  # items that will be broken by the bomb at each positions

        reachable = np.full(board.shape, False)
        q = [my_position]
        while q:
            p = q.pop()
            if reachable[p]:
                continue
            else:
                reachable[p] = True
                for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]:
                    next_position = (p[0] + dx, p[1] + dy)
                    if not self._on_board(next_position):
                        continue
                    if reachable[next_position]:
                        continue
                    if utility.position_is_wall(board, next_position):
                        continue
                    q.append(next_position)

        rows, cols = np.where(board == target_item.value)
        for wood_position in zip(rows, cols):
            x, y = wood_position
            for dx in range(1, min([blast_strength, board.shape[1] - x])):
                position = (x + dx, y)
                if reachable[position]:
                    n_breakable[position] += 1
                    broken_by[(x, y)].append(position)
                    to_break[position].append((x, y))
                else:
                    break
            for dx in range(1, min([blast_strength, x + 1])):
                position = (x - dx, y)
                if reachable[position]:
                    n_breakable[position] += 1
                    broken_by[(x, y)].append(position)
                    to_break[position].append((x, y))
                else:
                    break
            for dy in range(1, min([blast_strength, board.shape[1] - y])):
                position = (x, y + dy)
                if reachable[position]:
                    n_breakable[position] += 1
                    broken_by[(x, y)].append(position)
                    to_break[position].append((x, y))
                else:
                    break
            for dy in range(1, min([blast_strength, y + 1])):
                position = (x, y - dy)
                if reachable[position]:
                    n_breakable[position] += 1
                    broken_by[(x, y)].append(position)
                    to_break[position].append((x, y))
                else:
                    break

        return n_breakable, broken_by, to_break