def _is_closed(self, board, position): """ Check whether the position is srounded by Wood/Rigid. Parameters ---------- board = np.array(obs['board']) position = tuple(obs['position']) """ is_done = np.full(board.shape, False) is_done[position] = True to_search = [position] while to_search: x, y = to_search.pop() for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (x + dx, y + dy) if not self._on_board(new_position): continue if is_done[new_position]: continue is_done[new_position] = True if utility.position_is_agent(board, new_position): return False if utility.position_is_wall(board, new_position): continue if utility.position_is_fog(board, new_position): continue to_search.append(new_position) return True
def me_to_enemy_all_corridor(board, pos1, pos2): assert (pos1[0] == pos2[0] or pos1[1] == pos2[1]) if pos1[0] == pos2[0]: if pos1[1] < pos2[1]: direction = constants.Action.Right else: direction = constants.Action.Left else: if pos1[0] < pos2[0]: direction = constants.Action.Down else: direction = constants.Action.Up p_dirs = perpendicular_directions(direction) pos2_next = utility.get_next_position(pos2, direction) next_is_impasse = (not utility.position_on_board( board, pos2_next)) or utility.position_is_wall(board, pos2_next) if utility.position_on_board(board, pos2_next) and utility.position_is_fog( board, pos2_next): next_is_impasse = False if not (position_is_in_corridor(board, pos2, p_dirs) and next_is_impasse): # pos2:enempy must be in impasse return False all_corridor_flag = True pos = utility.get_next_position(pos1, direction) while pos != pos2: if not (utility.position_is_passage(board, pos)): all_corridor_flag = False break if not position_is_in_corridor(board, pos, p_dirs): all_corridor_flag = False break pos = utility.get_next_position(pos, direction) return all_corridor_flag
def position_is_passable(board, position, enemies): '''Determins if a possible can be passed''' return all([ any([ utility.position_is_agent(board, position), utility.position_is_powerup(board, position), utility.position_is_passage(board, position), utility.position_is_fog(board, position), ]), not utility.position_is_enemy(board, position, enemies) ])
def _stop_condition(board, pos, exclude_agent=True): if not utility.position_on_board(board, pos): return True if utility.position_is_fog(board, pos): return True if utility.position_is_wall(board, pos): return True if not exclude_agent: if utility.position_is_agent(board, pos): return True return False
def _all_bomb_real_life(board, bomb_life, bomb_blast_st): def get_bomb_real_life(bomb_position, bomb_real_life): """One bomb's real life is the minimum life of its adjacent bomb. Not that this could be chained, so please call it on each bomb mulitple times until converge """ dirs = _all_directions(exclude_stop=True) min_life = bomb_real_life[bomb_position] for d in dirs: pos = bomb_position last_pos = bomb_position while True: pos = utility.get_next_position(pos, d) if _stop_condition(board, pos): break if bomb_real_life[pos] > 0: if bomb_real_life[pos] < min_life and \ _manhattan_distance(pos, last_pos) <= bomb_blast_st[pos] - 1: min_life = bomb_real_life[pos] last_pos = pos else: break return min_life bomb_real_life_map = np.copy(bomb_life) sz = len(board) while True: no_change = [] for i in range(sz): for j in range(sz): if utility.position_is_wall(board, (i, j)) or utility.position_is_powerup(board, (i, j)) \ or utility.position_is_fog(board, (i, j)): continue if bomb_life[i, j] < 0 + EPSILON: continue real_life = get_bomb_real_life((i, j), bomb_real_life_map) no_change.append(bomb_real_life_map[i, j] == real_life) bomb_real_life_map[i, j] = real_life if all(no_change): break return bomb_real_life_map
def _find_safe_directions(self, board, my_position, unsafe_directions, bombs, enemies): def is_stuck_direction(next_position, bomb_range, next_board, enemies): '''Helper function to do determine if the agents next move is possible.''' Q = queue.PriorityQueue() Q.put((0, next_position)) seen = set() next_x, next_y = next_position is_stuck = True while not Q.empty(): dist, position = Q.get() seen.add(position) position_x, position_y = position if next_x != position_x and next_y != position_y: is_stuck = False break if dist > bomb_range: is_stuck = False break for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + position_x, col + position_y) if new_position in seen: continue if not utility.position_on_board(next_board, new_position): continue if not utility.position_is_passable( next_board, new_position, enemies): continue dist = abs(row + position_x - next_x) + abs(col + position_y - next_y) Q.put((dist, new_position)) return is_stuck # All directions are unsafe. Return a position that won't leave us locked. safe = [] if len(unsafe_directions) == 4: next_board = board.copy() next_board[my_position] = constants.Item.Bomb.value for direction, bomb_range in unsafe_directions.items(): next_position = utility.get_next_position( my_position, direction) next_x, next_y = next_position if not utility.position_on_board(next_board, next_position) or \ not utility.position_is_passable(next_board, next_position, enemies): continue if not is_stuck_direction(next_position, bomb_range, next_board, enemies): # We found a direction that works. The .items provided # a small bit of randomness. So let's go with this one. return [direction] if not safe: safe = [constants.Action.Stop] return safe x, y = my_position disallowed = [] # The directions that will go off the board. for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: position = (x + row, y + col) direction = utility.get_direction(my_position, position) # Don't include any direction that will go off of the board. if not utility.position_on_board(board, position): disallowed.append(direction) continue # Don't include any direction that we know is unsafe. if direction in unsafe_directions: continue if utility.position_is_passable( board, position, enemies) or utility.position_is_fog( board, position): safe.append(direction) if not safe: # We don't have any safe directions, so return something that is allowed. safe = [k for k in unsafe_directions if k not in disallowed] if not safe: # We don't have ANY directions. So return the stop choice. return [constants.Action.Stop] return safe
def act(self, obs, action_space, info): # # Definitions # enemy_mobility = 4 enemy_bomb = 1 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_kick = obs["can_kick"] # whether I can kick print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t") # # Understand current situation # # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) #np.set_printoptions(precision=2) #print("frac") #print(total_frac_blocked) # where to place bombs to break wood bomb_target_wood, n_breakable \ = self._get_bomb_target(info["list_boards_no_move"][-1], my_position, my_blast_strength, constants.Item.Wood, max_breakable=False) #bomb_target_enemy = (total_frac_blocked > 0) #bomb_target = bomb_target_enemy + bomb_target_wood bomb_target = bomb_target_wood # List of boards simulated list_boards, _ = self._board_sequence( board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where( info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value #print("boards") #for t, b in enumerate(list_boards): # print(t) # print(b[-3:,:]) # if t > 2: # break # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(list_boards, my_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable, _ = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() survivable_actions = [a for a in is_survivable if is_survivable[a]] #print("survivable actions", survivable_actions) if len(survivable_actions) == 0: return None # # Items and bomb target that can be reached in a survivable manner # reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable, bomb_target) # # Evaluate the survivability of each action # x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( succ, 1, next_position) if verbose: print("n_survivable") for a in n_survivable: print(a, n_survivable[a]) # # Avoid the action leading to no choice if possible # updated = False max_survivable_positions = max([n[-1] for n in n_survivable.values()]) if max_survivable_positions > 1: for a in n_survivable: if n_survivable[a][-1] > max_survivable_positions / 2: continue is_survivable[a] = False updated = True minn = defaultdict(int) for a in n_survivable: minn[a] = min(n_survivable[a][enemy_mobility:]) maxmin = max(minn.values()) if maxmin > 1: for a in minn: if minn[a] == 1: is_survivable[a] = False updated = True if updated: survivable_actions = [a for a in is_survivable if is_survivable[a]] # # Choose the survivable action, if it is the only choice # if len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value """ # # Bomb if it has dominating survivability # if is_survivable[constants.Action.Bomb]: bomb_is_most_survivable = True bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb])) for action in n_survivable: if action == constants.Action.Bomb: continue action_sorted = np.array(sorted(n_survivable[action])) if any(action_sorted > bomb_sorted): bomb_is_most_survivable = False break if bomb_is_most_survivable: action = constants.Action.Bomb print("Bomb to survive", action) return action.value """ # # Bomb at a target # best_action = None max_block = 0 for action in survivable_actions: next_position = self._get_next_position(my_position, action) block = total_frac_blocked[next_position] if block > max_block: max_block = block best_action = action if all([ is_survivable[constants.Action.Bomb], best_action in [constants.Action.Stop, constants.Action.Bomb] ]): print("Place a bomb at a locally optimal position", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = self._get_next_position(my_position, best_action) # TODO : PARAMETER TO OPTIMIZE if total_frac_blocked[next_position] > 0.1: print("Move towards better place to bomb", best_action) return best_action.value # # Bomb to break wood # consider_bomb = True if survivable_with_bomb is None: consider_bomb = False elif any([len(s) <= 1 for s in survivable_with_bomb[2:]]): # if not sufficiently survivable all the time after bomb, do not bomb consider_bomb = False elif self._might_break_powerup(info["list_boards_no_move"][-1], my_position, my_blast_strength, info["might_powerup"]): # if might break an item, do not bomb consider_bomb = False if consider_bomb and bomb_target[my_position]: # place bomb if I am at a bomb target print("Bomb at a bomb target", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards good items # good_items = [ constants.Item.ExtraBomb, constants.Item.IncrRange, constants.Item.Kick ] good_time_positions = set() # positions with good items for item in good_items: good_time_positions = good_time_positions.union( reachable_items[item]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward good item", action) return action.value # # Move towards where to bomb to break wood # good_time_positions = reachable_items["target"] print("good time positions", good_time_positions) action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward where to bomb", action) return action.value # # Kick # for my_action in kick_actions: if my_action == constants.Action.Up: next_position = (my_position[0] - 1, my_position[1]) elif my_action == constants.Action.Down: next_position = (my_position[0] + 1, my_position[1]) elif my_action == constants.Action.Right: next_position = (my_position[0], my_position[1] + 1) elif my_action == constants.Action.Left: next_position = (my_position[0], my_position[1] - 1) # do not kick a bomb if it will break a wall, enemies if info["moving_direction"][next_position] is None: print("checking static bomb") # if it is a static bomb if self._can_break(info["list_boards_no_move"][0], next_position, my_blast_strength, [constants.Item.Wood] + my_enemies): continue list_boards_with_kick_no_move, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) for enemy in my_enemies: rows, cols = np.where(board == enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable, _, _, _ \ = self._search_time_expanded_network(list_boards_with_kick_no_move, enemy_position) n_survivable_nodes_with_kick = sum( [len(positions) for positions in _survivable]) if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]: print("Kicking to reduce the survivability", n_survivable_nodes[enemy], "->", n_survivable_nodes_with_kick, my_action) return my_action.value # # TODO : move toward might powerups # # # Move towards a fog where we have not seen longest # best_time_position = None oldest = 0 for t, x, y in next_to_items[constants.Item.Fog]: neighbors = [(x + dx, y + dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]] age = max([ info["since_last_seen"][position] for position in neighbors if self._on_board(position) ]) if age > oldest: oldest = age best_time_position = (t, x, y) if best_time_position is not None: action = self._find_distance_minimizer(my_position, [best_time_position], prev, is_survivable) if action is not None: print("Moving toward oldest fog", action) return action.value # # Choose most survivable action # action = self._get_most_survivable_action(n_survivable) print("Most survivable action", action) return action.value
def _find_safe_directions(self, board, my_position, unsafe_directions, bombs, enemies, item): def is_stuck_direction(next_position, bomb_range, next_board, enemies): '''Helper function to do determine if the agents next move is possible.''' Q = queue.PriorityQueue() Q.put((0, next_position)) seen = set() next_x, next_y = next_position is_stuck = True while not Q.empty(): dist, position = Q.get() seen.add(position) #FIXME is_stuck=False position_x, position_y = position if next_x != position_x and next_y != position_y: is_stuck = False break if dist > bomb_range: is_stuck = False break for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + position_x, col + position_y) if new_position in seen: continue if not utility.position_on_board(next_board, new_position): continue if not utility.position_is_passable( next_board, new_position, enemies): continue dist = abs(row + position_x - next_x) + abs(col + position_y - next_y) Q.put((dist, new_position)) return is_stuck # All directions are unsafe. Return a position that won't leave us locked. safe = [] if len(unsafe_directions) == 4: next_board = board.copy() next_board[my_position] = constants.Item.Bomb.value disallowed = [] for direction, bomb_range in unsafe_directions.items(): next_position = utility.get_next_position( my_position, direction) next_x, next_y = next_position if not utility.position_on_board(next_board, next_position) or \ not utility.position_is_passable(next_board, next_position, enemies): disallowed.append(direction) continue if not is_stuck_direction(next_position, bomb_range, next_board, enemies): # We found a direction that works. The .items provided # a small bit of randomness. So let's go with this one. return [direction] if not safe: #当决定不动之前,判断是否是原地放炸弹,如果是原地放炸弹那么从unsafe_directions中随机一个 # for i in bombs: # if len(bombs) == 1 : if len(item[constants.Item(3)]) == 1: # if my_position == i['position']: for bomb in bombs: if my_position == bomb['position']: safe = [ k for k in unsafe_directions if k not in disallowed ] # break if not safe: safe = [constants.Action.Stop] return safe x, y = my_position disallowed = [] # The directions that will go off the board. for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: position = (x + row, y + col) direction = utility.get_direction(my_position, position) # Don't include any direction that will go off of the board. if not utility.position_on_board(board, position): disallowed.append(direction) continue # Don't include any direction that we know is unsafe. if direction in unsafe_directions: #当这个不安全位置不能通过的时候就disallow,防止踢炸弹 if not utility.position_is_passable(board, position, enemies): disallowed.append(direction) #当往不安全方向走,正好被炸死的话,那么就不能走。(刚好被炸死需要通过life来制定) # if continue if utility.position_is_passable( board, position, enemies) or utility.position_is_fog( board, position): #可能移动一个位置,隔壁存在炸弹 safe.append(direction) for bomb in bombs: if bomb['bomb_life'] == 1: bomb_x, bomb_y = bomb['position'] if bomb_x == position[0] and abs( bomb_y - position[1]) <= bomb['blast_strength']: #remove the direction safe.pop() break elif bomb_y == position[1] and abs( bomb_x - position[0]) <= bomb['blast_strength']: safe.pop() break if not safe: # We don't have any safe directions, so return something that is allowed. safe = [k for k in unsafe_directions if k not in disallowed] if not safe: # We don't have ANY directions. So return the stop choice. return [constants.Action.Stop] return safe
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] recently_seen_positions = (info["since_last_seen"] < 3) board[recently_seen_positions] = info["last_seen"][recently_seen_positions] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None my_kick = obs["can_kick"] # whether I can kick if verbose: print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t") my_next_position = {constants.Action.Stop: my_position, constants.Action.Bomb: my_position} for action in [constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right]: next_position = self._get_next_position(my_position, action) if self._on_board(next_position): if board[next_position] == constants.Item.Rigid.value: my_next_position[action] = None else: my_next_position[action] = next_position else: my_next_position[action] = None # # Understand current situation # if all([info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb], info["prev_position"] == my_position]): # if previously blocked, do not reapeat with some probability self._inv_tmp *= self._backoff else: self._inv_tmp = self._inv_tmp_init # enemy positions enemy_positions = list() for enemy in my_enemies: rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_positions.append((rows[0], cols[0])) # teammate position teammate_position = None if my_teammate is not None: rows, cols = np.where(board==my_teammate.value) if len(rows): teammate_position = (rows[0], cols[0]) # Positions where we kick a bomb if we move to if my_kick: kickable, might_kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() might_kickable = set() # positions that might be blocked if teammate_position is None: agent_positions = enemy_positions else: agent_positions = enemy_positions + [teammate_position] might_blocked = self._get_might_blocked(board, my_position, agent_positions, might_kickable) # enemy positions over time # these might be dissappeared due to extra flames if len(enemy_positions): rows = [p[0] for p in enemy_positions] cols = [p[1] for p in enemy_positions] list_enemy_positions = [(rows, cols)] _enemy_positions = list() for t in range(self._enemy_mobility): rows, cols = list_enemy_positions[-1] for x, y in zip(rows, cols): for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]: next_position = (x + dx, y + dy) if not self._on_board(next_position): continue _board = info["list_boards_no_move"][t] if utility.position_is_passage(_board, next_position): _enemy_positions.append(next_position) _enemy_positions = set(_enemy_positions) rows = [p[0] for p in _enemy_positions] cols = [p[1] for p in _enemy_positions] list_enemy_positions.append((rows, cols)) else: list_enemy_positions = [] # survivable actions is_survivable = dict() for a in self._get_all_actions(): is_survivable[a] = False n_survivable = dict() list_boards = dict() for my_action in self._get_all_actions(): next_position = my_next_position[my_action] if next_position is None: continue if my_action == constants.Action.Bomb: if any([my_ammo == 0, obs["bomb_blast_strength"][next_position] > 0]): continue if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1]): continue if all([my_action != constants.Action.Stop, obs["bomb_blast_strength"][next_position] > 0, next_position not in set.union(kickable, might_kickable)]): continue if next_position in set.union(kickable, might_kickable): # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue # list of boards from next steps list_boards[my_action], _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=my_action, can_kick=my_kick, enemy_mobility=self._enemy_mobility, enemy_bomb=self._enemy_bomb, agent_blast_strength=info["agent_blast_strength"]) # agents might be disappeared, because of overestimated bombs for t, positions in enumerate(list_enemy_positions): list_boards[my_action][t][positions] = constants.Item.AgentDummy.value # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards[my_action])): flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[my_action][t][flame_positions] = constants.Item.Flames.value """ processed = Parallel(n_jobs=-1, verbose=0)( [delayed(search_time_expanded_network)(list_boards[action][1:], my_next_position[action], action) for action in list_boards] ) for survivable, my_action in processed: if my_next_position[my_action] in survivable[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]] """ for my_action in list_boards: survivable = search_time_expanded_network(list_boards[my_action][1:], my_next_position[my_action]) if my_next_position[my_action] in survivable[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]] survivable_actions = list() for a in is_survivable: if not is_survivable[a]: continue if might_blocked[a] and not is_survivable[constants.Action.Stop]: continue if n_survivable[a][-1] <= 1: is_survivable[a] = False continue survivable_actions.append(a) # # Choose action # if len(survivable_actions) == 0: # # return None, if no survivable actions # return None elif len(survivable_actions) == 1: # # Choose the survivable action, if it is the only choice # action = survivable_actions[0] if verbose: print("The only survivable action", action) return action.value # # Bomb at a target # # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) if teammate_position is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"]) """ np.set_printoptions(precision=3) print("enemy") print(total_frac_blocked) print("teammate") print(total_frac_blocked_teammate) print("product") prod = total_frac_blocked * (1 - total_frac_blocked_teammate) print(prod[:5,:5]) """ p_survivable = defaultdict(float) for action in n_survivable: p_survivable[action] = sum(n_survivable[action]) / self._my_survivability_threshold if p_survivable[action] > 1: p_survivable[action] = 1 block = defaultdict(float) for action in [constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right]: next_position = my_next_position[action] if next_position is None: continue if next_position in set.union(kickable, might_kickable): # kick will be considered later continue if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop]]): # if the next position is flames, # I want to stop to wait, which must be feasible block[action] = total_frac_blocked[next_position] * p_survivable[constants.Action.Stop] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) continue elif not is_survivable[action]: continue if all([might_blocked[action], not is_survivable[constants.Action.Stop]]): continue block[action] = total_frac_blocked[next_position] * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if might_blocked[action]: block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop] + total_frac_blocked[next_position] * p_survivable[action]) / 2 if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if is_survivable[constants.Action.Bomb]: list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb) n_survivable_nodes_with_bomb = defaultdict(int) for enemy in my_enemies: # get survivable tree of the enemy rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable = search_time_expanded_network(list_boards_with_bomb, enemy_position) n_survivable_nodes_with_bomb[enemy] = sum([len(positions) for positions in _survivable]) n_with_bomb = sum([n_survivable_nodes_with_bomb[enemy] for enemy in my_enemies]) n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked_with_bomb = 0 # place more bombs, so the stacked enemy cannot kick x, y = my_position for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + dx, y + dy) following_position = (x + 2 * dx, y + 2 * dy) if not self._on_board(following_position): continue if all([obs["bomb_life"][next_position] > 0, board[following_position] > constants.Item.AgentDummy.value]): total_frac_blocked_with_bomb = 1 else: total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none if teammate_position is not None: # get survivable tree of the teammate _survivable = search_time_expanded_network(list_boards_with_bomb, teammate_position) n_survivable_nodes_with_bomb_teammate = sum([len(positions) for positions in _survivable]) n_with_bomb = n_survivable_nodes_with_bomb_teammate n_with_none = n_survivable_nodes_teammate[my_teammate] if n_with_none == 0: total_frac_blocked_with_bomb_teammate = 0 else: total_frac_blocked_with_bomb_teammate = 1 - n_with_bomb / n_with_none action = constants.Action.Bomb block[action] = total_frac_blocked_with_bomb * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_with_bomb_teammate) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) for next_position in kickable: action = self._get_direction(my_position, next_position) if not is_survivable[action]: continue list_boards_with_kick, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=action, can_kick=True) n_survivable_nodes_with_kick = defaultdict(int) for enemy in my_enemies: # get survivable tree of the enemy rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable = search_time_expanded_network(list_boards_with_kick, enemy_position) n_survivable_nodes_with_kick[enemy] = sum([len(positions) for positions in _survivable]) n_with_kick = sum([n_survivable_nodes_with_kick[enemy] for enemy in my_enemies]) n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked[next_position] = 0 else: total_frac_blocked[next_position] = 1 - n_with_kick / n_with_none if teammate_position is not None: # get survivable tree of the teammate _survivable = search_time_expanded_network(list_boards_with_kick, teammate_position) n_survivable_nodes_with_kick_teammate = sum([len(positions) for positions in _survivable]) n_with_kick = n_survivable_nodes_with_kick_teammate n_with_none = n_survivable_nodes_teammate[my_teammate] if n_with_none == 0: total_frac_blocked_teammate[next_position] = 0 else: total_frac_blocked_teammate[next_position] = 1 - n_with_kick / n_with_none block[action] = total_frac_blocked[next_position] * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) max_block = -np.inf best_action = None for action in block: if block[action] > max_block: max_block = block[action] best_action = action if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp: if teammate_position is not None: teammate_safety = total_frac_blocked_with_bomb_teammate * n_survivable_nodes_with_bomb_teammate if any([teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_with_bomb_teammate < self._interfere_threshold, total_frac_blocked_with_bomb_teammate < total_frac_blocked_teammate[my_position]]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if teammate_ok: if best_action == constants.Action.Bomb: if verbose: print("Bomb is best", constants.Action.Bomb) return constants.Action.Bomb.value if best_action == constants.Action.Stop: if verbose: print("Place a bomb at a locally optimal position", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = my_next_position[best_action] should_chase = (total_frac_blocked[next_position] > self._chase_threshold) if teammate_position is not None: teammate_safety = total_frac_blocked_teammate[next_position] * n_survivable_nodes_teammate[my_teammate] if any([teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_teammate[next_position] < self._interfere_threshold, total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position]]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if should_chase and teammate_ok: if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop]]): action = constants.Action.Stop if verbose: print("Wait flames life", action) return action.value else: if verbose: print("Move towards better place to bomb", best_action) return best_action.value # Exclude the action representing stop to wait max_block = -np.inf best_action = None for action in survivable_actions: if block[action] > max_block: max_block = block[action] best_action = action # # Do not take risky actions # most_survivable_action = self._action_most_survivable(n_survivable) # ignore actions with low survivability _survivable_actions = list() for action in n_survivable: n = sum(n_survivable[action]) if not is_survivable[action]: continue elif n > self._my_survivability_threshold: _survivable_actions.append(action) else: print("RISKY", action) is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: if verbose: print("Take the best action in danger", best_action) return best_action.value else: # Take the most survivable action if verbose: print("Take the most survivable action", most_survivable_action) return most_survivable_action.value # # Do not interfere with teammate # if all([teammate_position is not None, len(enemy_positions) > 0 or len(info["curr_bombs"]) > 0]): # ignore actions that interfere with teammate min_interfere = np.inf least_interfere_action = None _survivable_actions = list() for action in survivable_actions: if action == constants.Action.Bomb: frac = total_frac_blocked_with_bomb_teammate else: next_position = my_next_position[action] frac = total_frac_blocked_teammate[next_position] if frac < min_interfere: min_interfere = frac least_interfere_action = action if frac < self._interfere_threshold: _survivable_actions.append(action) else: print("INTERFERE", action) is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: # Take the least interfering action if verbose: print("Take the best action in intereference", best_action) return best_action.value else: if verbose: print("Take the least interfering action", least_interfere_action) return least_interfere_action.value consider_bomb = True if not is_survivable[constants.Action.Bomb]: consider_bomb = False # # Find reachable items # # List of boards simulated list_boards, _ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=self._enemy_mobility, enemy_bomb=self._enemy_bomb, agent_blast_strength=info["agent_blast_strength"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, _, _ = self._search_time_expanded_network(list_boards, my_position) if len(survivable[-1]) == 0: survivable = [set() for _ in range(len(survivable))] # Items and bomb target that can be reached in a survivable manner _, _, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # # If I have seen an enemy recently and cannot see him now, them move to the last seen position # action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info, my_enemies) if action is not None: if verbose: print("Moving toward last seen enemy", action) return action.value # # If I have seen a teammate recently, them move away from the last seen position # action = self._action_away_from_teammate(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info, my_teammate) if action is not None: if verbose: print("Moving away from last seen teammate", action) return action.value # # Move towards a fog where we have not seen longest # action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: #if True: if self.random.uniform() < 0.8: if verbose: print("Moving toward oldest fog", action) return action.value # # Choose most survivable action # max_block = -np.inf best_action = None for action in survivable_actions: if action == constants.Action.Bomb: continue if block[action] > max_block: max_block = block[action] best_action = action if verbose: print("Take the best action among safe actions (nothing else to do)", best_action) if best_action is None: # this should not be the case return None else: return best_action.value
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(info["list_boards_no_move"], my_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=0) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( succ, 1, next_position) most_survivable_action = None if survivable_actions: survivable_score = dict() for action in n_survivable: #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum( [n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break if most_survivable_action is not None: print("Most survivable action", most_survivable_action) return most_survivable_action.value # kick if possible if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print("Kickable", my_kick, kickable) while kickable: next_position = kickable.pop() action = self._get_direction(my_position, next_position) print("Must kick to survive", action) return action.value # move towards a teammate if she is blocking for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility._position_is_item(board, next_position, my_teammate): print("Must move to teammate to survive", action) return action.value # move towards an enemy for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_enemy(board, next_position, my_enemies): print("Must move to enemy to survive", action) return action.value # move towards anywhere besides ridid for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_rigid(board, next_position): continue if utility.position_is_wood(board, next_position): continue if utility.position_is_bomb(info["curr_bombs"], next_position): continue print("Try moving to survive", action) return action.value action = constants.Action.Stop print("Must die", action) return action
def get_intermediate_rewards(prev_observations, cur_observations, position_queue): # Note: only for team env r = [0.0, 0.0, 0.0, 0.0] for i in range(4): prev_alive = prev_observations[i]['alive'] prev_n_enemy = 0 for e in prev_observations[i]['enemies']: if e.value in prev_alive: prev_n_enemy += 1 prev_n_teammate = 1 if prev_observations[i][ 'teammate'].value in prev_alive else 0 prev_can_kick = prev_observations[i]['can_kick'] prev_n_ammo = prev_observations[i]['ammo'] prev_n_blast = prev_observations[i]['blast_strength'] prev_position = prev_observations[i]['position'] prev_wood_positions = list( zip(*np.where( prev_observations[i]['board'] == constants.Item.Wood.value))) cur_alive = cur_observations[i]['alive'] cur_n_enemy = 0 for e in cur_observations[i]['enemies']: if e.value in cur_alive: cur_n_enemy += 1 cur_n_teammate = 1 if cur_observations[i][ 'teammate'].value in cur_alive else 0 cur_can_kick = cur_observations[i]['can_kick'] cur_n_ammo = cur_observations[i]['ammo'] cur_n_blast = cur_observations[i]['blast_strength'] cur_position = cur_observations[i]['position'] if prev_n_enemy - cur_n_enemy > 0: r[i] += (prev_n_enemy - cur_n_enemy) * 0.5 if prev_n_teammate - cur_n_teammate > 0: r[i] -= (prev_n_teammate - cur_n_teammate) * 0.5 if not prev_can_kick and cur_can_kick: r[i] += 0.02 if cur_n_ammo - prev_n_ammo > 0: r[i] += 0.00 if cur_n_blast - prev_n_blast > 0: r[i] += 0.00 if cur_position not in position_queue: r[i] += 0.000 position_queue.append(cur_position) for row, col in prev_wood_positions: cur_board = cur_observations[i]['board'] if not utility.position_is_wall( cur_board, (row, col)) and not utility.position_is_fog( cur_board, (row, col)): r[i] += 0.000 #0 2 teammates, 1 3 teammates team_spirit = 0.2 r0 = r[0] * (1 - team_spirit) + team_spirit * r[2] r1 = r[1] * (1 - team_spirit) + team_spirit * r[3] r2 = r[2] * (1 - team_spirit) + team_spirit * r[0] r3 = r[3] * (1 - team_spirit) + team_spirit * r[1] mean1 = (r0 + r2) / 2.0 mean2 = (r1 + r3) / 2.0 #make sure it is zero-sum r = [r0 - mean2, r1 - mean1, r2 - mean2, r3 - mean1] #print(r) return r
def act(self, obs, action_space, info): # # Definitions # self._search_range = 10 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_kick = obs["can_kick"] # whether I can kick print("my position", my_position, end="\t") # # Understand current situation # # List of the set of survivable time-positions at each time # and preceding positions survivable_no_move, prev_no_move \ = self._search_time_expanded_network(info["list_boards_no_move"], my_position) # Items that can be reached in a survivable manner reachable_items_no_move, reached_no_move, next_to_items_no_move \ = self._find_reachable_items(info["list_boards_no_move"], my_position, survivable_no_move) # Simulation assuming enemies move for enemy_mobility in range(3, -1, -1): # List of boards simulated list_boards, _ = self._board_sequence( board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=enemy_mobility) # List of the set of survivable time-positions at each time # and preceding positions survivable, prev = self._search_time_expanded_network( list_boards, my_position) if len(survivable[1]) > 0: # Gradually reduce the mobility of enemy, so we have at least one survivable action break # Items that can be reached in a survivable manner reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"]) survivable_actions = [a for a in is_survivable if is_survivable[a]] # if verbose: if True: print("survivable actions are", survivable_actions) # Positions where we kick a bomb if we move to if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print() for t in range(0): print(list_boards[t]) print(survivable[t]) for key in prev[t]: print(key, prev[t][key]) # # Choose an action # """ # This is not effective in the current form if len(survivable_actions) > 1: # avoid the position if only one position at the following step # the number of positions that can be reached from the next position next = defaultdict(set) next_count = defaultdict(int) for position in survivable[1]: next[position] = set([p for p in prev[2] if position in prev[2][p]]) next_count[position] = len(next[position]) print("next count", next_count) if max(next_count.values()) > 1: for position in survivable[1]: if next_count[position] == 1: risky_action = self._get_direction(my_position, position) is_survivable[risky_action] = False survivable_actions = [a for a in is_survivable if is_survivable[a]] """ # Do not stay on a bomb if I can if all([ obs["bomb_life"][my_position] > 0, len(survivable_actions) > 1, is_survivable[constants.Action.Stop] ]): is_survivable[constants.Action.Stop] = False survivable_actions = [a for a in is_survivable if is_survivable[a]] if len(survivable_actions) == 0: print("Must die") return None elif len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value # TODO : shoud check the survivability of all agents in one method # Place a bomb if # - it does not significantly reduce my survivability # - it can reduce the survivability of enemies consider_bomb = True if survivable_with_bomb is None: consider_bomb = False elif any([len(s) <= 2 for s in survivable_with_bomb[1:]]): # if not sufficiently survivable all the time after bomb, do not bomb consider_bomb = False if consider_bomb: # place bomb if can reach fog/enemy if self._can_break(info["list_boards_no_move"][-1], my_position, my_blast_strength, [constants.Item.Fog] + my_enemies): print("Bomb to break fog/enemy", constants.Action.Bomb) print(info["list_boards_no_move"][-1]) return constants.Action.Bomb.value for enemy in my_enemies: # check if the enemy is reachable if len(reachable_items_no_move[enemy]) == 0: continue # can reach the enemy at enemy_position in enemy_time step enemy_time = reachable_items_no_move[enemy][0][0] enemy_position = reachable_items_no_move[enemy][0][1:3] # check if placing a bomb can reduce the survivability # of the enemy survivable_before, _ = self._search_time_expanded_network( info["list_boards_no_move"], enemy_position) board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(info["curr_bombs"]) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, info["curr_flames"], self._search_range, my_position, enemy_mobility=0) survivable_after, _ \ = self._search_time_expanded_network(list_boards_with_bomb, enemy_position) good_before = np.array([len(s) for s in survivable_before]) good_after = np.array([len(s) for s in survivable_after]) # TODO : what are good criteria? if any(good_after < good_before): # place a bomb if it makes sense print("Bomb to kill an enemy", constants.Action.Bomb) print("before", good_before) print("after ", good_after) print([len(s) for s in survivable]) print([len(s) for s in survivable_with_bomb]) return constants.Action.Bomb.value """ # find direction towards enemy positions = set([x[1:3] for x in next_to_items_no_move[enemy]]) for t in range(enemy_time, 1, -1): _positions = set() for position in positions: _positions = _positions.union(prev_no_move[t][position]) positions = _positions.copy() if enemy_time <= my_blast_strength: #if True: positions.add(my_position) positions_after_bomb = set(survivable[1]).difference(positions) if positions_after_bomb: print("Bomb to kill an enemy", enemy, constants.Action.Bomb) return constants.Action.Bomb.value """ # if I can kick, consider placing a bomb to kick if my_kick and my_position in survivable_with_bomb[3]: # consdier a sequence of actions: place bomb -> move (action) -> move back (kick) for action in [ constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: if not is_survivable[action]: continue if action == constants.Action.Up: # kick direction is down dx = 1 dy = 0 elif action == constants.Action.Down: # kick direction is up dx = -1 dy = 0 elif action == constants.Action.Left: # kick direction is right dx = 0 dy = 1 elif action == constants.Action.Right: # kick direction is left dx = 0 dy = -1 else: raise ValueError() _next_position = (my_position[0] + dx, my_position[1] + dy) if not self._on_board(_next_position): continue else: next_position = _next_position # Find where the bomb stops if kicked for t in range(int(obs["bomb_life"][my_position]) - 2): if not utility.position_is_passage( board, next_position): break _next_position = (next_position[0] + dx, next_position[1] + dy) if not self._on_board(_next_position): break else: next_position = _next_position if utility.position_is_fog(board, next_position): print("Bomb to kick into fog", action) return constants.Action.Bomb.value elif utility.position_is_enemy(list_boards[t + 2], next_position, my_enemies): print("Bomb to kick towards enemy", action) return constants.Action.Bomb.value """ x0, y0 = my_position positions_against = [(2*x0-x, 2*y0-y) for (x, y) in positions] positions_after_bomb = set(survivable[1]).intersection(positions_against) if positions_after_bomb: print("Bomb to kick", enemy, constants.Action.Bomb) return constants.Action.Bomb.value """ # kick if len(kickable) > 0: while kickable: # then consider what happens if I kick a bomb next_position = kickable.pop() # do not kick a bomb if it will break enemies if info["moving_direction"][next_position] is None: # if it is a static bomb if self._can_break(info["list_boards_no_move"][0], next_position, my_blast_strength, my_enemies): continue my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=3) survivable_with_kick, prev_kick \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: print("Kicking", my_action) return my_action.value # if on a bomb, consider where to kick in the following step if obs["bomb_life"][my_position] > 0: # For each survivable move in the next step, # check what happens if we kick in the following step. # If the bomb is kicked into a fog, plan to kick. # If the bomb is kicked toward an enemy, plan to kick. # Otherwise, do not plan to kick. for action in [ constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: if not is_survivable[action]: continue if action == constants.Action.Up: # kick direction is down dx = 1 dy = 0 elif action == constants.Action.Down: # kick direction is up dx = -1 dy = 0 elif action == constants.Action.Left: # kick direction is right dx = 0 dy = 1 elif action == constants.Action.Right: # kick direction is left dx = 0 dy = -1 else: raise ValueError() _next_position = (my_position[0] + dx, my_position[1] + dy) if not self._on_board(_next_position): continue else: next_position = _next_position # Find where the bomb stops if kicked for t in range(int(obs["bomb_life"][my_position]) - 1): if not utility.position_is_passage(board, next_position): break _next_position = (next_position[0] + dx, next_position[1] + dy) if not self._on_board(_next_position): break else: next_position = _next_position if utility.position_is_fog(board, next_position): print("Moving to kick into fog", action) return action.value elif utility.position_is_enemy(list_boards[t + 2], next_position, my_enemies): print("Moving to kick towards enemy", action) # Move towards an enemy good_time_positions = set() for enemy in my_enemies: good_time_positions = good_time_positions.union( next_to_items[enemy]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward enemy", action) return action.value # # Random action # action = random.choice(survivable_actions) print("Random action", action) return action.value
def _find_reachable_items(self, list_boards, my_position, time_positions, bomb_target=None): """ Find items reachable from my position Parameters ---------- list_boards : list list of boards, generated by _board_sequence my_position : tuple my position, where the search starts time_positions : list survivable time-positions, generated by _search_time_expanded_network Return ------ items : dict items[item] : list of time-positions from which one can reach item reached : array minimum time to reach each position on the board next_to_items : dict next_to_items[item] : list of time-positions from which one can reach the position next to item """ if bomb_target is None: bomb_target = np.full(self.board_shape, False) # items found on time_positions and the boundary (for Wood) items = defaultdict(list) # reached[position] : minimum time to reach the position reached = np.full(self.board_shape, np.inf) # whether already checked the position _checked = np.full(self.board_shape, False) # positions next to wood or other agents (count twice if next to two woods) next_to_items = defaultdict(list) for t, positions in enumerate(time_positions): # check the positions reached at time t board = list_boards[t] for position in positions: if reached[position] < np.inf: continue reached[position] = t item = constants.Item(board[position]) items[item].append((t, ) + position) if bomb_target[position]: items["target"].append((t, ) + position) _checked[position] = True x, y = position for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + row, y + col) if not self._on_board(next_position): continue if _checked[next_position]: continue _checked[next_position] = True if any([ utility.position_is_agent(board, next_position), utility.position_is_fog(board, next_position) ]): item = constants.Item(board[next_position]) items[item].append((t, ) + next_position) next_to_items[item].append((t, ) + position) # ignoring wall that will not exist when explode if utility.position_is_wood(list_boards[-1], next_position): item = constants.Item(board[next_position]) items[item].append((t, ) + next_position) next_to_items[item].append((t, ) + position) return items, reached, next_to_items
def act(self, obs, action_space, info): # # Definitions # enemy_mobility = 4 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_kick = obs["can_kick"] # whether I can kick print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t") # # Understand current situation # # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) bomb_target_enemy = (total_frac_blocked > 0) # List of boards simulated list_boards, _ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=enemy_mobility) # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(list_boards, my_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=enemy_mobility) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=enemy_mobility) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [len(s) for s in survivable_with_kick[1:]] kick_actions.append(my_action) else: kickable = set() if len(survivable_actions) == 0: return None # # bomb target that can be reached in a survivable manner # reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable, bomb_target_enemy) # # Evaluate the survivability of each action # x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [len(s) for s in survivable_with_bomb[1:]] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable(succ, 1, next_position) #if True: if verbose: print("n_survivable") for a in n_survivable: print(a, n_survivable[a]) # # Choose the survivable action, if it is the only choice # if len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value # # Bomb if it has dominating survivability # if is_survivable[constants.Action.Bomb]: bomb_is_most_survivable = True bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb])) for action in n_survivable: if action == constants.Action.Bomb: continue action_sorted = np.array(sorted(n_survivable[action])) if any(action_sorted > bomb_sorted): bomb_is_most_survivable = False break if bomb_is_most_survivable: action = constants.Action.Bomb print("Bomb to survive", action) return action.value # # Bomb at a target # consider_bomb = True if survivable_with_bomb is None: consider_bomb = False elif any([len(s) <= 1 for s in survivable_with_bomb[2:]]): # if not sufficiently survivable all the time after bomb, do not bomb consider_bomb = False # # Place a bomb # best_action = None max_block = 0 for action in survivable_actions: if action == constants.Action.Stop: continue next_position = self._get_next_position(my_position, action) block = total_frac_blocked[next_position] if block > max_block: max_block = block best_action = action if consider_bomb and best_action == constants.Action.Stop: print("Place a bomb at a locally optimal position", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Stop]: print("Move towards better place to bomb", best_action) return best_action.value good_time_positions = reachable_items["target"] if good_time_positions: score = [total_frac_blocked[(x,y)] / (t+1) for t, x, y in good_time_positions] argmax = np.argwhere(score==np.max(score)) best_time_positions = [good_time_positions[i[0]] for i in argmax] action = self._find_distance_minimizer(my_position, best_time_positions, #good_time_positions, prev, is_survivable) if action is not None: print("Moving toward where to bomb", action) return action.value # # Kick # for my_action in kick_actions: if my_action == constants.Action.Up: next_position = (my_position[0] - 1, my_position[1]) elif my_action == constants.Action.Down: next_position = (my_position[0] + 1, my_position[1]) elif my_action == constants.Action.Right: next_position = (my_position[0], my_position[1] + 1) elif my_action == constants.Action.Left: next_position = (my_position[0], my_position[1] - 1) # do not kick a bomb if it will break enemies if info["moving_direction"][next_position] is None: print("checking static bomb") # if it is a static bomb if self._can_break(info["list_boards_no_move"][0], next_position, my_blast_strength, my_enemies): continue list_boards_with_kick_no_move, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) for enemy in my_enemies: rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable, _, _, _ \ = self._search_time_expanded_network(list_boards_with_kick_no_move, enemy_position) n_survivable_nodes_with_kick = sum([len(positions) for positions in _survivable]) if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]: print("Kicking to reduce the survivability", n_survivable_nodes[enemy], "->", n_survivable_nodes_with_kick, my_action) return my_action.value # # Move towards a fog where we have not seen longest # best_time_position = None oldest = 0 for t, x, y in next_to_items[constants.Item.Fog]: neighbors = [(x+dx, y+dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]] age = max([info["since_last_seen"][position] for position in neighbors if self._on_board(position)]) if age > oldest: oldest = age best_time_position = (t, x, y) if best_time_position is not None: action = self._find_distance_minimizer(my_position, [best_time_position], prev, is_survivable) if action is not None: print("Moving toward oldest fog", action) return action.value # # Choose most survivable action # survivable_score = dict() for action in n_survivable: # survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum([n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) most_survivable_action = None random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break print("Most survivable action", most_survivable_action) return most_survivable_action.value