Ejemplo n.º 1
0
    def _djikstra(board, my_position, bombs, enemies, depth=None, exclude=None):
        assert (depth is not None)

        if exclude is None:
            exclude = [
                constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames
            ]

        def out_of_range(p_1, p_2):
            '''Determines if two points are out of rang of each other'''
            x_1, y_1 = p_1
            x_2, y_2 = p_2
            return abs(y_2 - y_1) + abs(x_2 - x_1) > depth

        items = defaultdict(list)
        dist = {}
        prev = {}
        Q = queue.PriorityQueue()

        my_x, my_y = my_position
        for r in range(max(0, my_x - depth), min(len(board), my_x + depth)):
            for c in range(max(0, my_y - depth), min(len(board), my_y + depth)):
                position = (r, c)
                if any([
                        out_of_range(my_position, position),
                        utility.position_in_items(board, position, exclude),
                ]):
                    continue

                if position == my_position:
                    dist[position] = 0
                else:
                    dist[position] = np.inf

                prev[position] = None
                Q.put((dist[position], position))

        for bomb in bombs:
            if bomb['position'] == my_position:
                items[constants.Item.Bomb].append(my_position)

        while not Q.empty():
            _, position = Q.get()

            if utility.position_is_passable(board, position, enemies):
                x, y = position
                val = dist[(x, y)] + 1
                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_position = (row + x, col + y)
                    if new_position not in dist:
                        continue

                    if val < dist[new_position]:
                        dist[new_position] = val
                        prev[new_position] = position

            item = constants.Item(board[position])
            items[item].append(position)

        return items, dist, prev
Ejemplo n.º 2
0
        def is_stuck_direction(next_position, bomb_range, next_board, enemies):
            Q = queue.PriorityQueue()
            Q.put((0, next_position))
            seen = set()

            nx, ny = next_position
            is_stuck = True
            while not Q.empty():
                dist, position = Q.get()
                seen.add(position)

                px, py = position
                if nx != px and ny != py:
                    is_stuck = False
                    break

                if dist > bomb_range:
                    is_stuck = False
                    break

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_position = (row + px, col + py)
                    if new_position in seen:
                        continue

                    if not utility.position_on_board(next_board, new_position):
                        continue

                    if not utility.position_is_passable(next_board,
                                                        new_position, enemies):
                        continue

                    dist = abs(row + px - nx) + abs(col + py - ny)
                    Q.put((dist, new_position))
            return is_stuck
def step_to(obs, new_position, lay_bomb=False):
    """return: a copy of new observation after stepping into new_position. 
       If lay_bomb==True, it is actually two-step change (i.e., lay bomb then go to new_position)
    """
    assert (utility.position_is_passable(obs['board'], new_position,
                                         obs['enemies']))
    new_obs = copy.deepcopy(obs)
    sz = len(obs['board'])
    old_board = obs['board']
    old_position = obs['position']
    old_position_value = constants.Item.Bomb.value
    if not lay_bomb:
        #if not lay bomb, the agent could on a bomb, making a 1-step move to new_position
        old_position_value = constants.Item.Bomb.value if obs['bomb_life'][old_position] > 0 else \
        constants.Item.Passage.value
    #1.move agent to new position, 2. update board, bomb_blast_st, bomb_life, position
    new_obs['position'] = new_position  #update position
    new_obs['board'][old_position] = old_position_value  # update board
    agent_id = old_board[old_position]
    new_obs['board'][new_position] = agent_id  # update board
    if lay_bomb:
        new_obs['bomb_blast_strength'][old_position] = obs[
            'blast_strength']  #update blast_st
        new_obs['bomb_life'][
            old_position] = constants.DEFAULT_BOMB_LIFE  #update bomb_life
    for i in range(sz):
        for j in range(sz):
            time_step = 2 if lay_bomb else 1
            if new_obs['bomb_life'][i, j] < 2:
                continue
            new_obs['bomb_life'][i, j] = max(
                1, new_obs['bomb_life'][i, j] - time_step)
    return new_obs
Ejemplo n.º 4
0
 def _filter_invalid_directions(board, my_position, directions, enemies):
     ret = []
     for direction in directions:
         position = utility.get_next_position(my_position, direction)
         if utility.position_on_board(
                 board, position) and utility.position_is_passable(
                     board, position, enemies):
             ret.append(direction)
     return ret
Ejemplo n.º 5
0
def _filter_legal_actions(state):
    my_position = tuple(state['position'])
    board = np.array(state['board'])
    enemies = [constants.Item(e) for e in state['enemies']]
    ret = [constants.Action.Bomb]
    for direction in directions:
        position = utility.get_next_position(my_position, direction)
        if utility.position_on_board(
                board, position) and utility.position_is_passable(
                    board, position, enemies):
            ret.append(direction)
    return ret
Ejemplo n.º 6
0
def valid_directions(obs):
    res = [0] * 6
    pos = obs['position']
    board = obs['board']
    enemies = obs['enemies']
    for act in dirs:
        next_pos = util.get_next_position(pos, act)
        if util.position_on_board(board,
                                  next_pos) and util.position_is_passable(
                                      board, next_pos, enemies):
            res[act.value] = 1
        else:
            res[act.value] = -1
    return res
Ejemplo n.º 7
0
        def is_stuck_direction(next_position, bomb_range, next_board, enemies):
            '''Helper function to do determine if the agents next move is possible.'''
            Q = queue.PriorityQueue()
            Q.put((0, next_position))
            seen = set()

            next_x, next_y = next_position
            is_stuck = True
            while not Q.empty():
                dist, position = Q.get()
                seen.add(position)

                #FIXME is_stuck=False
                position_x, position_y = position
                if next_x != position_x and next_y != position_y:
                    is_stuck = False
                    break

                if dist > bomb_range:
                    is_stuck = False
                    break

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_position = (row + position_x, col + position_y)
                    if new_position in seen:
                        continue

                    if not utility.position_on_board(next_board, new_position):
                        continue

                    if not utility.position_is_passable(
                            next_board, new_position, enemies):
                        continue

                    dist = abs(row + position_x -
                               next_x) + abs(col + position_y - next_y)
                    Q.put((dist, new_position))
            return is_stuck
Ejemplo n.º 8
0
def CanGo(bfs_node, action, enemies):
    position_x, position_y = utility.get_next_position(bfs_node.my_position,
                                                       action)
    if not utility.is_valid_direction(bfs_node.env._board,
                                      (position_x, position_y), action):
        return False
    if not utility.position_is_passable(bfs_node.env._board,
                                        (position_x, position_y), enemies):
        return False
    can_go = True
    bombs = bfs_node.env._bombs
    for bomb in bombs:
        bomb_x, bomb_y = bomb.position
        safe_from_bomb = False
        if bomb.life > bomb.blast_strength + 2: continue
        if (bomb_x == position_x
                and abs(bomb_y - position_y) <= bomb.blast_strength):
            for pix_y in range(min(bomb_y, position_y),
                               max(bomb_y, position_y)):
                if utility.position_is_wall(bfs_node.env._board,
                                            (position_x, pix_y)):
                    safe_from_bomb = True
                    break
        elif (bomb_y == position_y
              and abs(bomb_x - position_x) <= bomb.blast_strength):
            for pix_x in range(min(bomb_x, position_x),
                               max(bomb_x, position_x)):
                if utility.position_is_wall(bfs_node.env._board,
                                            (pix_x, position_y)):
                    safe_from_bomb = True
                    break
        else:
            continue
        if not safe_from_bomb:
            can_go = False
    return can_go
Ejemplo n.º 9
0
    def _find_safe_directions(self, board, my_position, unsafe_directions,
                              bombs, enemies):
        def is_stuck_direction(next_position, bomb_range, next_board, enemies):
            '''Helper function to do determine if the agents next move is possible.'''
            Q = queue.PriorityQueue()
            Q.put((0, next_position))
            seen = set()

            next_x, next_y = next_position
            is_stuck = True
            while not Q.empty():
                dist, position = Q.get()
                seen.add(position)

                position_x, position_y = position
                if next_x != position_x and next_y != position_y:
                    is_stuck = False
                    break

                if dist > bomb_range:
                    is_stuck = False
                    break

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_position = (row + position_x, col + position_y)
                    if new_position in seen:
                        continue

                    if not utility.position_on_board(next_board, new_position):
                        continue

                    if not utility.position_is_passable(
                            next_board, new_position, enemies):
                        continue

                    dist = abs(row + position_x -
                               next_x) + abs(col + position_y - next_y)
                    Q.put((dist, new_position))
            return is_stuck

        # All directions are unsafe. Return a position that won't leave us locked.
        safe = []

        if len(unsafe_directions) == 4:
            next_board = board.copy()
            next_board[my_position] = constants.Item.Bomb.value

            for direction, bomb_range in unsafe_directions.items():
                next_position = utility.get_next_position(
                    my_position, direction)
                next_x, next_y = next_position
                if not utility.position_on_board(next_board, next_position) or \
                   not utility.position_is_passable(next_board, next_position, enemies):
                    continue

                if not is_stuck_direction(next_position, bomb_range,
                                          next_board, enemies):
                    # We found a direction that works. The .items provided
                    # a small bit of randomness. So let's go with this one.
                    return [direction]
            if not safe:
                safe = [constants.Action.Stop]
            return safe

        x, y = my_position
        disallowed = []  # The directions that will go off the board.

        for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            position = (x + row, y + col)
            direction = utility.get_direction(my_position, position)

            # Don't include any direction that will go off of the board.
            if not utility.position_on_board(board, position):
                disallowed.append(direction)
                continue

            # Don't include any direction that we know is unsafe.
            if direction in unsafe_directions:
                continue

            if utility.position_is_passable(
                    board, position, enemies) or utility.position_is_fog(
                        board, position):
                safe.append(direction)

        if not safe:
            # We don't have any safe directions, so return something that is allowed.
            safe = [k for k in unsafe_directions if k not in disallowed]

        if not safe:
            # We don't have ANY directions. So return the stop choice.
            return [constants.Action.Stop]

        return safe
Ejemplo n.º 10
0
def position_is_not_passible(board, position, enemies):
    return not utility.position_is_passable(board, position, enemies)
Ejemplo n.º 11
0
    def _djikstra(board,
                  my_position,
                  bombs,
                  enemies,
                  depth=None,
                  exclude=None):
        """
        Dijkstra method

        Parameters
        ----------
        board = np.array(obs['board'])

        my_position = tuple(obs['position'])

        bombs = convert_bombs(np.array(obs['bomb_blast_strength']))

        enemies = [constants.Item(e) for e in obs['enemies']]
        """

        if depth is None:
            depth = len(board) * 2

        if exclude is None:
            exclude = [
                constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames
            ]

        def out_of_range(p1, p2):
            x1, y1 = p1
            x2, y2 = p2
            return abs(y2 - y1) + abs(x2 - x1) > depth

        items = defaultdict(list)

        for bomb in bombs:
            if bomb['position'] == my_position:
                items[constants.Item.Bomb].append(my_position)

        dist = {}
        prev = {}

        mx, my = my_position
        for r in range(max(0, mx - depth), min(len(board), mx + depth)):
            for c in range(max(0, my - depth), min(len(board), my + depth)):
                position = (r, c)
                if any([
                        out_of_range(my_position, position),
                        utility.position_in_items(board, position, exclude),
                ]):
                    continue

                if position == my_position:
                    dist[position] = 0
                else:
                    dist[position] = np.inf

                prev[position] = None

                item = constants.Item(board[position])
                items[item].append(position)

        # Djikstra
        H = []
        heapq.heappush(H, (0, my_position))
        while H:
            min_dist, position = heapq.heappop(H)

            if not utility.position_is_passable(board, position, enemies):
                continue

            x, y = position
            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                new_position = (row + x, col + y)
                if new_position not in dist:
                    continue

                if min_dist + 1 < dist[new_position]:
                    dist[new_position] = min_dist + 1
                    prev[new_position] = position
                    heapq.heappush(H, (dist[new_position], new_position))

        return items, dist, prev
Ejemplo n.º 12
0
    def _find_safe_directions(self, board, my_position, unsafe_directions,
                              bombs, enemies, item):
        def is_stuck_direction(next_position, bomb_range, next_board, enemies):
            '''Helper function to do determine if the agents next move is possible.'''
            Q = queue.PriorityQueue()
            Q.put((0, next_position))
            seen = set()

            next_x, next_y = next_position
            is_stuck = True
            while not Q.empty():
                dist, position = Q.get()
                seen.add(position)

                #FIXME is_stuck=False
                position_x, position_y = position
                if next_x != position_x and next_y != position_y:
                    is_stuck = False
                    break

                if dist > bomb_range:
                    is_stuck = False
                    break

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_position = (row + position_x, col + position_y)
                    if new_position in seen:
                        continue

                    if not utility.position_on_board(next_board, new_position):
                        continue

                    if not utility.position_is_passable(
                            next_board, new_position, enemies):
                        continue

                    dist = abs(row + position_x -
                               next_x) + abs(col + position_y - next_y)
                    Q.put((dist, new_position))
            return is_stuck

        # All directions are unsafe. Return a position that won't leave us locked.
        safe = []

        if len(unsafe_directions) == 4:
            next_board = board.copy()
            next_board[my_position] = constants.Item.Bomb.value
            disallowed = []
            for direction, bomb_range in unsafe_directions.items():
                next_position = utility.get_next_position(
                    my_position, direction)
                next_x, next_y = next_position
                if not utility.position_on_board(next_board, next_position) or \
                   not utility.position_is_passable(next_board, next_position, enemies):
                    disallowed.append(direction)
                    continue

                if not is_stuck_direction(next_position, bomb_range,
                                          next_board, enemies):
                    # We found a direction that works. The .items provided
                    # a small bit of randomness. So let's go with this one.
                    return [direction]
            if not safe:

                #当决定不动之前,判断是否是原地放炸弹,如果是原地放炸弹那么从unsafe_directions中随机一个
                # for i in bombs:
                # if len(bombs) == 1 :
                if len(item[constants.Item(3)]) == 1:
                    # if my_position == i['position']:
                    for bomb in bombs:
                        if my_position == bomb['position']:
                            safe = [
                                k for k in unsafe_directions
                                if k not in disallowed
                            ]
                        # break
            if not safe:
                safe = [constants.Action.Stop]
            return safe

        x, y = my_position
        disallowed = []  # The directions that will go off the board.

        for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            position = (x + row, y + col)
            direction = utility.get_direction(my_position, position)

            # Don't include any direction that will go off of the board.
            if not utility.position_on_board(board, position):
                disallowed.append(direction)
                continue

            # Don't include any direction that we know is unsafe.
            if direction in unsafe_directions:

                #当这个不安全位置不能通过的时候就disallow,防止踢炸弹
                if not utility.position_is_passable(board, position, enemies):
                    disallowed.append(direction)

                #当往不安全方向走,正好被炸死的话,那么就不能走。(刚好被炸死需要通过life来制定)
                # if

                continue

            if utility.position_is_passable(
                    board, position, enemies) or utility.position_is_fog(
                        board, position):
                #可能移动一个位置,隔壁存在炸弹
                safe.append(direction)
                for bomb in bombs:
                    if bomb['bomb_life'] == 1:
                        bomb_x, bomb_y = bomb['position']
                        if bomb_x == position[0] and abs(
                                bomb_y -
                                position[1]) <= bomb['blast_strength']:
                            #remove the direction
                            safe.pop()
                            break
                        elif bomb_y == position[1] and abs(
                                bomb_x -
                                position[0]) <= bomb['blast_strength']:
                            safe.pop()
                            break
        if not safe:
            # We don't have any safe directions, so return something that is allowed.
            safe = [k for k in unsafe_directions if k not in disallowed]

        if not safe:
            # We don't have ANY directions. So return the stop choice.
            return [constants.Action.Stop]

        return safe
Ejemplo n.º 13
0
    def get_all_possible_states(self):
        list_of_states = []
        moves = [
            constants.Action.Stop, constants.Action.Up, constants.Action.Down,
            constants.Action.Left, constants.Action.Right, 5
        ]
        board_shape1, board_shape2 = self._board.shape
        #check if move will land me on top of a bomb
        #unsafe_directions = self._directions_in_range_of_bomb(self._board, self._my_position, self.curr_bombs)
        #if unsafe_directions:
        #    if(len(unsafe_directions) != 4):
        #        for i in unsafe_directions:
        #print("get all possible states, removing unsafe move", i)
        #            moves.remove(i)
        # #if I am on a bomb, remove stop
        # if self._bomb_life[self._my_position[0]][self._my_position[1]] > 0:
        #     if constants.Action.Stop in moves:
        #         moves.remove(constants.Action.Stop)
        lost_all_moves = False
        if len(moves) == 0:
            lost_all_moves = True
            # input("FKING HELL")
            moves = [
                constants.Action.Up, constants.Action.Down,
                constants.Action.Left, constants.Action.Right,
                constants.Action.Stop, 5
            ]
        for move in moves:
            if move == 5 or utility.is_valid_direction(
                    self._board, self._my_position, move):
                #check if position is passible
                check_pos = None
                if move == constants.Action.Up:
                    check_pos = (self._my_position[0] - 1,
                                 self._my_position[1])
                elif move == constants.Action.Down:
                    check_pos = (self._my_position[0] + 1,
                                 self._my_position[1])
                elif move == constants.Action.Left:
                    check_pos = (self._my_position[0],
                                 self._my_position[1] - 1)
                elif move == constants.Action.Right:
                    check_pos = (self._my_position[0],
                                 self._my_position[1] + 1)
                if check_pos != None:
                    if not utility.position_is_passable(
                            self._board, check_pos, self._enemies):
                        #if i am blocked by a bomb, try kicking
                        if self._obs['can_kick']:
                            if move == constants.Action.Up:
                                if self._board[self._my_position[0] -
                                               1][self._my_position[1]] == 3:
                                    if self._my_position[0] - 2 >= 0:
                                        if self._board[
                                                self._my_position[0] -
                                                2][self._my_position[1]] != 0:
                                            # print("removing non passable move", move)
                                            continue
                                    else:
                                        # print("removing non passable move", move)
                                        continue
                            elif move == constants.Action.Down:
                                if self._board[self._my_position[0] +
                                               1][self._my_position[1]] == 3:
                                    if self._my_position[0] + 2 < board_shape1:
                                        if self._board[
                                                self._my_position[0] +
                                                2][self._my_position[1]] != 0:
                                            # print("removing non passable move", move)
                                            continue
                                    else:
                                        # print("removing non passable move", move)
                                        continue
                            elif move == constants.Action.Left:
                                if self._board[self._my_position[0]][
                                        self._my_position[1] - 1] == 3:
                                    if self._my_position[1] - 2 >= 0:
                                        if self._board[self._my_position[0]][
                                                self._my_position[1] - 2] != 0:
                                            # print("removing non passable move", move)
                                            continue
                                    else:
                                        # print("removing non passable move", move)
                                        continue
                            elif move == constants.Action.Right:
                                if self._board[self._my_position[0]][
                                        self._my_position[1] + 1] == 3:
                                    if self._my_position[1] + 2 < board_shape2:
                                        if self._board[self._my_position[0]][
                                                self._my_position[1] + 2] != 0:
                                            # print("removing non passable move", move)
                                            continue
                                    else:
                                        # print("removing non passable move", move)
                                        continue
                            else:
                                # print("removing non passable move", move)
                                continue
                        else:
                            # print("removing non passable move", move)
                            continue
                #check to see if its a safe dir
                if move == 5 and self._ammo == 0:
                    # print("bombing without a bomb, skip")
                    #can not bomb with no ammo
                    continue

                #if I am on a bomb, lets not bomb
                if move == 5 and self._my_position in self.bombing_agents:
                    # print("bombing while on bomb, skip")
                    continue

                temp_board, temp_curr_agent, temp_curr_bombs, temp_curr_items, temp_curr_flames, bombing_agents = self.advance_game_on_copy(
                    move)

                temp_obs = self.fm.get_observations(
                    temp_board, temp_curr_agent, temp_curr_bombs,
                    temp_curr_flames, False, 11, self._game_mode,
                    '')[self.self_agent_value - 10]
                temp_obs['ammo'] = self._ammo
                if move == 5:
                    bombing_agents[(
                        self._my_position[0],
                        self._my_position[1])] = self.self_agent_value - 10
                    temp_obs['ammo'] = self._ammo - 1

                temp_obs['enemies'] = self._enemies

                temp_state = State(temp_obs, True)
                temp_state.bombing_agents = bombing_agents
                temp_state.move = move

                temp_state.score = temp_state.get_score()
                temp_state.score -= 0.1

                #IF THE SCORE IS NEGATIVE, WE DONT WANT THIS STATE
                #IF THE AGENT IS DEAD, NEGATIVE
                if not temp_state.am_I_alive:
                    temp_state.score -= 100

                if lost_all_moves == True:
                    temp_state.score -= 200

                list_of_states.append(temp_state)
        return list_of_states
Ejemplo n.º 14
0
    def _djikstra(board,
                  my_position,
                  bombs,
                  enemies,
                  bomb_timer=None,
                  depth=None,
                  exclude=None):

        if depth is None:
            depth = len(board) * 2

        if exclude is None:
            exclude = [
                constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames
            ]

        def out_of_range(p1, p2):
            x1, y1 = p1
            x2, y2 = p2
            return abs(y2 - y1) + abs(x2 - x1) > depth

        items = defaultdict(list)

        for bomb in bombs:
            if bomb['position'] == my_position:
                items[constants.Item.Bomb].append(my_position)

        dist = {}
        prev = {}

        mx, my = my_position
        for r in range(max(0, mx - depth), min(len(board), mx + depth)):
            for c in range(max(0, my - depth), min(len(board), my + depth)):
                position = (r, c)
                if any([
                        out_of_range(my_position, position),
                        utility.position_in_items(board, position, exclude),
                ]):
                    continue

                if position == my_position:
                    dist[position] = 0
                else:
                    dist[position] = np.inf

                prev[position] = None

                item = constants.Item(board[position])
                items[item].append(position)

        # Djikstra
        H = []
        heapq.heappush(H, (0, my_position))
        while H:
            min_dist, position = heapq.heappop(H)

            if (board[position] != constants.Item.Bomb.value
                ) and not utility.position_is_passable(board, position,
                                                       enemies):
                continue

            x, y = position
            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                new_position = (row + x, col + y)
                if new_position not in dist:
                    continue

                if not utility.position_is_passable(board, new_position,
                                                    enemies):
                    continue

                if bomb_timer is not None:
                    t = bomb_timer[new_position]
                    if t > 0 and abs((min_dist + 1) - t) < 2:
                        continue

                if min_dist + 1 < dist[new_position]:
                    dist[new_position] = min_dist + 1
                    prev[new_position] = position
                    heapq.heappush(H, (dist[new_position], new_position))

        return items, dist, prev
Ejemplo n.º 15
0
    def act(self, obs, action_space):
        def convert_bombs(bomb_map):
            ret = []
            locations = np.where(bomb_map > 0)
            for r, c in zip(locations[0], locations[1]):
                ret.append({
                    'position': (r, c),
                    'blast_strength': int(bomb_map[(r, c)])
                })
            return ret

        depth = 20

        my_position = tuple(obs['position'])
        board = np.array(obs['board'])
        bombs = convert_bombs(np.array(obs['bomb_blast_strength']))
        enemies = [constants.Item(e) for e in obs['enemies']]
        ammo = int(obs['ammo'])
        blast_strength = int(obs['blast_strength'])

        if self.prev_pos != None:
            if self.prev_pos == my_position:
                if 1 <= self.prev_action.value <= 4:
                    if self.logging:
                        print('freeze')
                    board[self.prev_pos] = constants.Item.Rigid.value

        items, dist, prev = self._djikstra(board,
                                           my_position,
                                           bombs,
                                           enemies,
                                           bomb_timer=self.bomb_time,
                                           depth=depth)

        if self.logging:
            print('my_position =', my_position)
            print('board =')
            print(board)
            print('dist =')
            print(dist)
            print('bombs =', bombs)
            print('enemies =', enemies)
            for e in enemies:
                print(e)
                pos = items.get(e, [])
                print('pos =', pos)
                print('pos_len=', len(pos))
                if len(pos) > 0:
                    print('xy=', pos[0][0], ',', pos[0][1])
                # print('pos_r =', x, ',',y)
            print('ammo =', ammo)
            print('blast_strength =', blast_strength)

        test_ary = np.ones((11, 11))

        for c in range(11):
            for r in range(11):
                if (r, c) in dist:
                    test_ary[r, c] = dist[(r, c)]
                else:
                    test_ary[r, c] = -1

        if self.logging:
            print("dist_mat:")
            print(test_ary)

        # update bomb_time map
        bomb_life = 8
        has_bomb = {}
        already_breakable = np.zeros((11, 11))
        for b in bombs:
            r, c = b['position']
            strength = b['blast_strength']
            # print('bomb_cr =', c, 'r=', r, 'st=', strength)

            if self.bomb_time[(r, c)] == 0:
                self.bomb_time[(r, c)] = bomb_life
            else:
                self.bomb_time[(r, c)] -= 1

            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                for d in range(1, strength):

                    new_pos = (r + d * row, c + d * col)

                    if TestSimpleAgent._out_of_board(new_pos):
                        continue

                    # if new_pos[0] < 0 or new_pos[0] > 10:
                    #     continue
                    # if new_pos[1] < 0 or new_pos[1] > 10:
                    #     continue

                    if utility.position_is_rigid(board, new_pos):
                        continue

                    if utility.position_is_wood(board, new_pos):
                        already_breakable[new_pos] = 1

                    if self.bomb_time[new_pos] == 0:
                        self.bomb_time[new_pos] = bomb_life
                    else:
                        self.bomb_time[new_pos] -= 1

                    has_bomb[new_pos] = 1

        # clear up table
        for c in range(11):
            for r in range(11):
                if (r, c) not in has_bomb:
                    self.bomb_time[(r, c)] = 0

        if self.logging:
            print("bomb_time:")
            print(self.bomb_time)

        # evaluate each position in terms of breakable woods
        num_breakable = np.zeros((11, 11))
        num_breakable_inside = np.zeros((11, 11))
        for c in range(11):
            for r in range(11):
                if utility.position_is_wood(board, (r, c)):
                    if already_breakable[(r, c)]:
                        continue
                    for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                        for d in range(1, blast_strength):
                            new_pos = (r + d * row, c + d * col)

                            if TestSimpleAgent._out_of_board(new_pos):
                                continue

                            if utility.position_is_passable(
                                    board, new_pos,
                                    enemies) or utility.position_is_flames(
                                        board, new_pos):
                                num_breakable[new_pos] += 1
                            else:
                                break

                    tmp_num = 0
                    has_passable = False
                    for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                        new_pos = (r + row, c + col)
                        if TestSimpleAgent._out_of_board(new_pos):
                            continue

                        if utility.position_is_wood(board, new_pos):
                            tmp_num += 1
                        elif utility.position_is_passable(
                                board, new_pos, enemies):
                            has_passable = True

                    if (not has_passable) and tmp_num > 0:
                        tmp_num -= 1

                    num_breakable_inside[(r, c)] = tmp_num

        if self.logging:
            print('num_breakable:')
            print(num_breakable)

            print('num_breakable_inside:')
            print(num_breakable_inside)

        num_breakable_total = np.zeros((11, 11))
        for c in range(11):
            for r in range(11):
                num_breakable_total[(r, c)] = num_breakable[(r, c)]

                if num_breakable_total[(r, c)] == -1 or num_breakable_total[(
                        r, c)] == np.inf:
                    continue

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    new_pos = (r + row, c + col)

                    if new_pos[0] < 0 or new_pos[0] > 10:
                        continue
                    if new_pos[1] < 0 or new_pos[1] > 10:
                        continue

                    num_breakable_total[(
                        r, c)] += num_breakable_inside[new_pos] * 0.5

        if self.logging:
            print('num_breakable_total:')
            print(num_breakable_total)

        # evaluate each position in total
        pos_scores = np.zeros((11, 11))
        for c in range(11):
            for r in range(11):
                if (r, c) not in dist:
                    pos_scores[(r, c)] = -1
                    continue
                elif dist[(r, c)] == np.inf:
                    pos_scores[(r, c)] = np.inf
                    continue

                if num_breakable_total[(r, c)] > 0:
                    pos_scores[(r, c)] += num_breakable_total[(r, c)]
                    pos_scores[(r, c)] += (depth - dist[(r, c)]) * 0.2

                # consider power-up items
                if board[(r, c)] in {
                        constants.Item.ExtraBomb.value,
                        constants.Item.IncrRange.value
                }:
                    pos_scores[(r, c)] += 50

        if self.logging:
            print('pos_score:')
            print(pos_scores)

        # consider degree of freedom
        dis_to_ene = 100
        for e in enemies:
            pos = items.get(e, [])
            if len(pos) > 0:
                d = abs(pos[0][0] - my_position[0]) + abs(pos[0][1] -
                                                          my_position[1])
                if dis_to_ene > d:
                    dis_to_ene = d
        if dis_to_ene <= -4:
            # if direction is not None:
            deg_frees = np.zeros((11, 11))
            for c in range(11):
                for r in range(11):
                    # if pos_scores[(r, c)] == np.inf:
                    #     continue
                    if not utility.position_is_passable(
                            board, (r, c), enemies):
                        continue

                    deg_free = 0
                    for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                        new_pos = (r + row, c + col)
                        if new_pos[0] < 0 or new_pos[0] > 10:
                            continue
                        if new_pos[1] < 0 or new_pos[1] > 10:
                            continue

                        if utility.position_is_passable(
                                board, new_pos,
                                enemies) or utility.position_is_flames(
                                    board, new_pos):
                            deg_free += 1

                    deg_frees[(r, c)] = deg_free

                    if deg_free <= 1:
                        pos_scores[(r, c)] -= 5

            if self.logging:
                print('deg_free')
                print(deg_frees)

        # consider bomb blast
        for i in range(len(bombs)):
            r, c = bombs[i]['position']
            strength = bombs[i]['blast_strength']

            pos_scores[(r, c)] = -20

            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                for d in range(1, strength):

                    new_pos = (r + d * row, c + d * col)
                    if new_pos[0] < 0 or new_pos[0] > 10:
                        continue
                    if new_pos[1] < 0 or new_pos[1] > 10:
                        continue

                    if new_pos not in dist:
                        continue
                    elif new_pos == np.inf:
                        continue

                    pos_scores[new_pos] = -20

        if self.logging:
            print('consider blast pos_score:')
            print(pos_scores)

        # consider enemies
        for e in enemies:
            pos = items.get(e, [])
            if len(pos) > 0:
                r = pos[0][0]
                c = pos[0][1]

                for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    for d in range(1, blast_strength * 2):
                        new_pos = (r + d * row, c + d * col)
                        if new_pos[0] < 0 or new_pos[0] > 10:
                            continue
                        if new_pos[1] < 0 or new_pos[1] > 10:
                            continue

                        if not utility.position_is_passable(
                                board, new_pos, enemies):
                            break

                        pos_scores[new_pos] += 0.3

        if self.logging:
            print('consider enemy:')
            print(pos_scores)

        h_r, h_c = -1, -1
        h_score = -1
        for c in range(11):
            for r in range(11):
                if (r, c) not in dist:
                    continue
                elif dist[(r, c)] == np.inf:
                    continue

                if h_score < pos_scores[(r, c)]:
                    h_score = pos_scores[(r, c)]
                    h_r, h_c = (r, c)

        if self.logging:
            print('h_score and pos:', h_score, '(r, c) =', h_r, ',', h_c)
            print('prev:')
            print(prev)

        # if current position is not the highest score position, move to the highest position.
        if h_r == -1:
            # print('action: Stop')
            self.prev_action = constants.Action.Stop
            # return constants.Action.Stop.value
        elif pos_scores[my_position] == h_score:
            if self._can_escape(pos_scores, my_position, blast_strength):
                # print('set bomb')
                self.prev_action = constants.Action.Bomb
                # return constants.Action.Bomb.value
            else:
                # print('action: Stop2')
                self.prev_action = constants.Action.Stop
                # return constants.Action.Stop.value
        else:
            # print('action: backtrack')
            self.prev_action = self._backtrack(my_position, (h_r, h_c), prev)
            # return self._backtrack(my_position, (h_r, h_c), prev)

        self.prev_pos = my_position
        if self.logging:
            print('action: ', self.prev_action)
        return self.prev_action.value

        # Move if we are in an unsafe place.
        unsafe_directions = self._directions_in_range_of_bomb(
            board, my_position, bombs, dist)
        if unsafe_directions:
            directions = self._find_safe_directions(board, my_position,
                                                    unsafe_directions, bombs,
                                                    enemies)
            return random.choice(directions).value

        # Lay pomme if we are adjacent to an enemy.
        if self._is_adjacent_enemy(items, dist, enemies) and self._maybe_bomb(
                ammo, blast_strength, items, dist, my_position):
            return constants.Action.Bomb.value

        # Move towards an enemy if there is one in exactly three reachable spaces.
        direction = self._near_enemy(my_position, items, dist, prev, enemies,
                                     3)
        if direction is not None and (self._prev_direction != direction
                                      or random.random() < .5):
            self._prev_direction = direction
            return direction.value

        # Move towards a good item if there is one within two reachable spaces.
        direction = self._near_good_powerup(my_position, items, dist, prev, 2)
        if direction is not None:
            return direction.value

        # Maybe lay a bomb if we are within a space of a wooden wall.
        if self._near_wood(my_position, items, dist, prev, 1):
            if self._maybe_bomb(ammo, blast_strength, items, dist,
                                my_position):
                return constants.Action.Bomb.value
            else:
                return constants.Action.Stop.value

        # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb.
        direction = self._near_wood(my_position, items, dist, prev, 2)
        if direction is not None:
            directions = self._filter_unsafe_directions(
                board, my_position, [direction], bombs)
            if directions:
                return directions[0].value

        # Choose a random but valid direction.
        directions = [
            constants.Action.Stop, constants.Action.Left,
            constants.Action.Right, constants.Action.Up, constants.Action.Down
        ]
        valid_directions = self._filter_invalid_directions(
            board, my_position, directions, enemies)
        directions = self._filter_unsafe_directions(board, my_position,
                                                    valid_directions, bombs)
        directions = self._filter_recently_visited(
            directions, my_position, self._recently_visited_positions)
        if len(directions) > 1:
            directions = [k for k in directions if k != constants.Action.Stop]
        if not len(directions):
            directions = [constants.Action.Stop]

        # Add this position to the recently visited uninteresting positions so we don't return immediately.
        self._recently_visited_positions.append(my_position)
        self._recently_visited_positions = self._recently_visited_positions[
            -self._recently_visited_length:]

        return random.choice(directions).value
Ejemplo n.º 16
0
def not_stuck_directions(obs):
    my_position = tuple(obs['position'])
    board = np.array(obs['board'])
    enemies = [consts.Item(e) for e in obs['enemies']]

    def is_stuck_direction(next_position, bomb_range, next_board, enemies):
        Q = queue.PriorityQueue()
        Q.put((0, next_position))
        seen = set()

        nx, ny = next_position
        is_stuck = True
        while not Q.empty():
            dist, position = Q.get()
            seen.add(position)

            px, py = position
            if nx != px and ny != py:
                is_stuck = False
                break

            if dist > bomb_range:
                is_stuck = False
                break

            for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                new_position = (row + px, col + py)
                if new_position in seen:
                    continue

                if not util.position_on_board(next_board, new_position):
                    continue

                if not util.position_is_passable(next_board, new_position,
                                                 enemies):
                    continue

                dist = abs(row + px - nx) + abs(col + py - ny)
                Q.put((dist, new_position))
        return is_stuck

    res = [0] * 6

    next_board = board.copy()
    next_board[my_position] = consts.Item.Bomb.value

    for direction in dirs:
        next_position = util.get_next_position(my_position, direction)
        nx, ny = next_position
        if not util.position_on_board(next_board, next_position) or \
                not util.position_is_passable(next_board, next_position, enemies):
            continue

        if not is_stuck_direction(
                next_position, obs['bomb_blast_strength'][nx, ny], next_board,
                enemies):
            # We found a direction that works. The .items provided
            # a small bit of randomness. So let's go with this one.
            res[direction.value] = 1
    if res == [0] * 6:
        res = [-1] * 6
        res[0] = 1
    return res