def _what_to_break(cls, board, my_position, blast_strength): x, y = my_position to_break = list() # To up for dx in range(1, blast_strength): if x + dx >= len(board[0]): break position = (x + dx, y) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To down for dx in range(1, blast_strength): if x - dx < 0: break position = (x - dx, y) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To right for dy in range(1, blast_strength): if y + dy >= len(board): break position = (x, y + dy) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To left for dy in range(1, blast_strength): if y - dy < 0: break position = (x, y - dy) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break return to_break
def act(self, obs, action_space): # convert action_space action_space = spaces.Discrete(action_space) # convert obs for key in ["board", "bomb_blast_strength", "bomb_life"]: obs[key] = np.array(obs[key], dtype="uint8") obs["position"] = tuple(obs["position"]) obs["teammate"] = constants.Item(obs["teammate"]) obs["enemies"] = [constants.Item(n) for n in obs["enemies"]] return self._agent.act(obs, action_space)
def _djikstra(board, my_position, bombs, enemies, depth=None, exclude=None): assert (depth is not None) if exclude is None: exclude = [ constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames ] def out_of_range(p_1, p_2): '''Determines if two points are out of rang of each other''' x_1, y_1 = p_1 x_2, y_2 = p_2 return abs(y_2 - y_1) + abs(x_2 - x_1) > depth items = defaultdict(list) dist = {} prev = {} Q = queue.PriorityQueue() my_x, my_y = my_position for r in range(max(0, my_x - depth), min(len(board), my_x + depth)): for c in range(max(0, my_y - depth), min(len(board), my_y + depth)): position = (r, c) if any([ out_of_range(my_position, position), utility.position_in_items(board, position, exclude), ]): continue if position == my_position: dist[position] = 0 else: dist[position] = np.inf prev[position] = None Q.put((dist[position], position)) for bomb in bombs: if bomb['position'] == my_position: items[constants.Item.Bomb].append(my_position) while not Q.empty(): _, position = Q.get() if utility.position_is_passable(board, position, enemies): x, y = position val = dist[(x, y)] + 1 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + x, col + y) if new_position not in dist: continue if val < dist[new_position]: dist[new_position] = val prev[new_position] = position item = constants.Item(board[position]) items[item].append(position) return items, dist, prev
def move2wooden(obs): res = [0] * 6 my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [consts.Item(e) for e in obs['enemies']] items, dist, prev = _djikstra(board, my_position, bombs, enemies, depth=10) direction = _near_wood(my_position, items, dist, prev, 2) if direction: res[direction.value] = 1 return res
def _filter_legal_actions(state): my_position = tuple(state['position']) board = np.array(state['board']) enemies = [constants.Item(e) for e in state['enemies']] ret = [constants.Action.Bomb] for direction in directions: position = utility.get_next_position(my_position, direction) if utility.position_on_board( board, position) and utility.position_is_passable( board, position, enemies): ret.append(direction) return ret
def unsafe_directions(obs): my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [consts.Item(e) for e in obs['enemies']] items, dist, prev = _djikstra(board, my_position, bombs, enemies, depth=10) unsafe_directions = _directions_in_range_of_bomb(board, my_position, bombs, dist) res = [0] * 6 for key in unsafe_directions: if unsafe_directions[key] > 0: res[key.value] = -1 return res
def wooden_wall(obs): res = [0] * 6 my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [consts.Item(e) for e in obs['enemies']] ammo = int(obs['ammo']) blast_strength = int(obs['blast_strength']) items, dist, prev = _djikstra(board, my_position, bombs, enemies, depth=10) if _near_wood(my_position, items, dist, prev, 1): if _maybe_bomb(ammo, blast_strength, items, dist, my_position): res[-1] = 1 return res
def action(): #pylint: disable=W0612 '''handles an action over http''' data = request.get_json() observation = data.get("obs") observation = json.loads(observation) observation['teammate'] = constants.Item(observation['teammate']) for enemy_id in range(len(observation['enemies'])): observation['enemies'][enemy_id] = constants.Item( observation['enemies'][enemy_id]) observation['position'] = tuple(observation['position']) observation['board'] = np.array(observation['board'], dtype=np.uint8) observation['bomb_life'] = np.array(observation['bomb_life'], dtype=np.float64) observation['bomb_blast_strength'] = np.array( observation['bomb_blast_strength'], dtype=np.float64) action_space = data.get("action_space") action_space = json.loads(action_space) action = self.act(observation, action_space) return jsonify({"action": action})
def kill_enemy(obs): my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [consts.Item(e) for e in obs['enemies']] ammo = int(obs['ammo']) blast_strength = int(obs['blast_strength']) items, dist, prev = _djikstra(board, my_position, bombs, enemies, depth=10) res = [0] * 6 if _is_adjacent_enemy(items, dist, enemies) and _maybe_bomb( ammo, blast_strength, items, dist, my_position): res[-1] = 1 return res
def __init__(self, obs, init=False, bombing_agents={}, board_size=11): self._game_mode = constants.GameType.FFA self.move = None self._board_size = board_size self._obs = obs self._my_position = tuple(obs['position']) self._board = np.array(obs['board']) self._bomb_life = np.array(self._obs['bomb_life']) self._teammate = obs['teammate'] self._enemies = [constants.Item(e) for e in obs['enemies']] self._ammo = int(obs['ammo']) self.fm = forward_model.ForwardModel() self.self_agent = self.find_self_agent(self._obs) agents_id = [ constants.Item.Agent0, constants.Item.Agent1, constants.Item.Agent2, constants.Item.Agent3 ] self._agents = [ characters.Bomber(aid.value, "FFA") for aid in agents_id ] # remember to modifiy if it is team or radio mode self.bombing_agents = copy.deepcopy(bombing_agents) self.score = 0 if init: self.curr_flames = self.convert_flames( self._board) # determine by confirming the map self.curr_bombs = self.convert_bombs( np.array(obs['bomb_blast_strength']), np.array(obs['bomb_life'])) self.curr_items = self.convert_items(self._board) self.curr_agents = self.convert_agents(self._board) self.last_items = self.curr_items if (bombing_agents != {}): self.curr_bombs = self.convert_bombs_two( np.array(self._obs['bomb_blast_strength']), self._bomb_life, bombing_agents)
def find_next_move(self, obs, action_space, win_condition, score_func, bombing_agents): self.action_space = action_space self.win_condition = win_condition self.bombing_agents = bombing_agents self.score_func = score_func self.bombing_agents = bombing_agents my_pos = tuple(obs['position']) board = np.array(obs['board']) self.board = np.array(obs['board']) self._enemies = [constants.Item(e) for e in obs['enemies']] tree = gn.Tree(obs, True, self.bombing_agents) #get the root node self.rootNode = tree.get_root_node() #need way to find terminating condition self.end_time = 30 start_time = time.time() elapsed = 0 self.bfs(self.rootNode, start_time) # max_score = self.score_func(self.rootNode.get_child_with_max_score().state.obs) max_score = -1 winner_node = None for child in self.rootNode.childArray: if (child.score) > max_score: max_score = child.score winner_node = child # print("max score {0} reached level {1} with move {2}".format(max_score, endLevel, winner_node.state.move)) if winner_node is None: return constants.Action.Stop.value return (winner_node.state.move)
def reward(self, id, current_obs, info): reward = 0 if utility._position_is_item(self.prev_obs[id]['board'], current_obs[id]['position'], constants.Item.IncrRange): reward += 0.01 self.stat[id][Metrics.IncrRange.name] += 1 if utility._position_is_item(self.prev_obs[id]['board'], current_obs[id]['position'], constants.Item.ExtraBomb): reward += 0.01 self.stat[id][Metrics.ExtraBomb.name] += 1 if utility._position_is_item( self.prev_obs[id]['board'], current_obs[id]['position'], constants.Item.Kick) and not self.prev_obs[id]['can_kick']: reward += 0.02 self.stat[id][Metrics.Kick.name] = True for i in range(10, 14): if i in self.alive_agents and i not in current_obs[id]['alive']: if constants.Item(value=i) in current_obs[id]['enemies']: reward += 0.5 self.stat[id][Metrics.EnemyDeath.name] += 1 elif i - 10 == id: reward += -1 self.stat[id][Metrics.DeadOrSuicide.name] += 1 else: reward += -0.5 if info['result'] == constants.Result.Tie: reward += -1 return reward
def _find_reachable_items(self, list_boards, my_position, time_positions): """ Find items reachable from my position Parameters ---------- list_boards : list list of boards, generated by _board_sequence my_position : tuple my position, where the search starts time_positions : list survivable time-positions, generated by _search_time_expanded_network Return ------ items : dict items[item] : list of time-positions from which one can reach item reached : array minimum time to reach each position on the board next_to_items : dict next_to_items[item] : list of time-positions from which one can reach the position next to item """ # items found on time_positions and the boundary (for Wood) items = defaultdict(list) # reached[position] : minimum time to reach the position reached = np.full(self.board_shape, np.inf) # whether already checked the position _checked = np.full(self.board_shape, False) # positions next to wood or other agents (count twice if next to two woods) next_to_items = defaultdict(list) for t, positions in enumerate(time_positions): # check the positions reached at time t board = list_boards[t] for position in positions: if reached[position] < np.inf: continue reached[position] = t item = constants.Item(board[position]) items[item].append((t,) + position) _checked[position] = True x, y = position for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + row, y + col) if not self._on_board(next_position): continue if _checked[next_position]: continue _checked[next_position] = True if utility.position_is_agent(board, next_position): item = constants.Item(board[next_position]) items[item].append((t,)+next_position) next_to_items[item].append((t,) + position) # ignoring wall that will not exist when explode if utility.position_is_wood(list_boards[-1], next_position): item = constants.Item(board[next_position]) items[item].append((t,)+next_position) next_to_items[item].append((t,) + position) return items, reached, next_to_items
def act(self, obs, action_space, info): # # Definitions # #board = obs['board'] board = info["recently_seen"] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [ constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy ] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None my_kick = obs["can_kick"] # whether I can kick # # Understand current situation # # positions that might be blocked if info["teammate_position"] is None: agent_positions = info["enemy_positions"] else: agent_positions = info["enemy_positions"] + [ info["teammate_position"] ] # survivable actions if len(info["enemy_positions"]) > 0: mobility = self._enemy_mobility else: mobility = 0 n_survivable, is_survivable, list_boards \ = self._get_survivable(obs, info, my_position, info["my_next_position"], agent_positions, info["all_kickable"], allow_kick_to_fog=False, enemy_mobility=mobility, enemy_bomb=self._enemy_bomb, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) for a in info["might_block_actions"]: n_survivable[a] = np.zeros(self._search_range) is_survivable[a] = False survivable_actions = list() for a in is_survivable: if not is_survivable[a]: continue if info["might_blocked"][a] and not is_survivable[ constants.Action.Stop]: continue if n_survivable[a][-1] <= 1: is_survivable[a] = False continue survivable_actions.append(a) # # Choose action # if len(survivable_actions) == 0: # # return None, if no survivable actions # return None elif len(survivable_actions) == 1: # # Choose the survivable action, if it is the only choice # action = survivable_actions[0] return action.value if all([ info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb], info["prev_position"] == my_position ]): # if previously blocked, do not reapeat with some probability self._inv_tmp *= self._backoff else: self._inv_tmp = self._inv_tmp_init # # Bomb at a target # # fraction of blocked node in the survival trees of enemies _list_boards = info["list_boards_no_move"] if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) if info["teammate_position"] is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"]) if n_survivable_nodes_teammate[my_teammate] > 0: LB = self._teammate_survivability_threshold / n_survivable_nodes_teammate[ my_teammate] positions_teammate_safe = np.where( total_frac_blocked_teammate < LB) total_frac_blocked_teammate[positions_teammate_safe] = 0 p_survivable = defaultdict(float) for action in n_survivable: p_survivable[action] = sum( n_survivable[action]) / self._my_survivability_threshold if p_survivable[action] > 1: p_survivable[action] = 1 block = defaultdict(float) for action in [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: next_position = info["my_next_position"][action] if next_position is None: continue if next_position in info["all_kickable"]: # kick will be considered later continue if all([ utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop] ]): # if the next position is flames, # I want to stop to wait, which must be feasible block[action] = total_frac_blocked[ next_position] * p_survivable[constants.Action.Stop] if info["teammate_position"] is not None: block[action] *= ( 1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) continue elif not is_survivable[action]: continue if all([ info["might_blocked"][action], not is_survivable[constants.Action.Stop] ]): continue block[action] = total_frac_blocked[next_position] * p_survivable[ action] if info["teammate_position"] is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if info["might_blocked"][action]: block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop] + total_frac_blocked[next_position] * p_survivable[action]) / 2 if info["teammate_position"] is not None: block[action] *= ( 1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if is_survivable[constants.Action.Bomb]: list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) n_survivable_nodes_with_bomb = defaultdict(int) for enemy_position in info["enemy_positions"]: # get survivable tree of the enemy _survivable = search_time_expanded_network( list_boards_with_bomb, enemy_position) n_survivable_nodes_with_bomb[enemy_position] = sum( [len(positions) for positions in _survivable]) n_with_bomb = sum([ n_survivable_nodes_with_bomb[enemy_position] for enemy_position in info["enemy_positions"] ]) n_with_none = sum( [n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked_with_bomb = 0 # place more bombs, so the stacked enemy cannot kick x, y = my_position for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + dx, y + dy) following_position = (x + 2 * dx, y + 2 * dy) if not self._on_board(following_position): continue if all([ obs["bomb_life"][next_position] > 0, board[following_position] > constants.Item.AgentDummy.value ]): total_frac_blocked_with_bomb = 1 else: total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none action = constants.Action.Bomb block[action] = total_frac_blocked_with_bomb # block[action] += total_frac_blocked[my_position] * (eisenachAgents - total_frac_blocked_with_bomb) block[action] *= p_survivable[action] block_teammate_with_bomb = None if block[action] > 0: if info["teammate_position"] is not None: block_teammate_with_bomb \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) block_teammate_with_bomb \ += total_frac_blocked_teammate[my_position] * (1 - block_teammate_with_bomb) block[action] *= (1 - block_teammate_with_bomb) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) for next_position in info["kickable"]: action = self._get_direction(my_position, next_position) if not is_survivable[action]: continue list_boards_with_kick, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=action, can_kick=True, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) block[action] \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes, board, my_enemies, ignore_dying_agent=True) block[action] += total_frac_blocked[next_position] * ( 1 - block[action]) block[action] *= p_survivable[action] if block[action] > 0: if info["teammate_position"] is not None: block_teammate_with_kick \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) block_teammate_with_kick \ += total_frac_blocked_teammate[next_position] * (1 - block_teammate_with_kick) block[action] *= (1 - block_teammate_with_kick) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) max_block = 0 # do not choose zero blocking action as the best best_action = None for action in block: if block[action] > max_block: max_block = block[action] best_action = action if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp: if info["teammate_position"] is not None: if block_teammate_with_bomb is None: block_teammate_with_bomb \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) teammate_safety = block_teammate_with_bomb * n_survivable_nodes_teammate[ my_teammate] if any([ teammate_safety > self._teammate_survivability_threshold, block_teammate_with_bomb < self._interfere_threshold, block_teammate_with_bomb < total_frac_blocked_teammate[my_position] ]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if teammate_ok: if best_action == constants.Action.Bomb: return constants.Action.Bomb.value if best_action == constants.Action.Stop: return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = info["my_next_position"][best_action] should_chase = (total_frac_blocked[next_position] > self._chase_threshold) if info["teammate_position"] is not None: teammate_safety = total_frac_blocked_teammate[ next_position] * n_survivable_nodes_teammate[my_teammate] if any([ teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_teammate[next_position] < self._interfere_threshold, total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position] ]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if should_chase and teammate_ok: if all([ utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop] ]): action = constants.Action.Stop return action.value else: return best_action.value # Exclude the action representing stop to wait max_block = 0 # do not choose zero blocking action as the best best_action = None for action in survivable_actions: if block[action] > max_block: max_block = block[action] best_action = action # # Do not take risky actions when not interacting with enemies # most_survivable_action = self._action_most_survivable(n_survivable) if total_frac_blocked[my_position] > 0: # ignore actions with low survivability _survivable_actions = list() for action in n_survivable: n = sum(n_survivable[action]) if not is_survivable[action]: continue elif n > self._my_survivability_threshold: _survivable_actions.append(action) else: is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: return best_action.value else: # Take the most survivable action return most_survivable_action.value # # Do not interfere with teammate # if all([ info["teammate_position"] is not None, len(info["enemy_positions"]) > 0 or len(info["curr_bombs"]) > 0 ]): # ignore actions that interfere with teammate min_interfere = np.inf least_interfere_action = None _survivable_actions = list() for action in survivable_actions: if action == constants.Action.Bomb: """ if block_teammate_with_bomb is None: block_teammate_with_bomb \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) frac = block_teammate_with_bomb """ continue else: next_position = info["my_next_position"][action] frac = total_frac_blocked_teammate[next_position] if frac < min_interfere: min_interfere = frac least_interfere_action = action if frac < self._interfere_threshold: _survivable_actions.append(action) else: is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: # Take the least interfering action return best_action.value else: return least_interfere_action.value consider_bomb = True if not is_survivable[constants.Action.Bomb]: consider_bomb = False # # Find reachable items # # List of boards simulated list_boards, _ = self._board_sequence( board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=mobility, enemy_bomb=self._enemy_bomb, enemy_positions=agent_positions, agent_blast_strength=info["agent_blast_strength"], step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where( info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, _, _ = self._search_time_expanded_network( list_boards, my_position) if len(survivable[-1]) == 0: survivable = [set() for _ in range(len(survivable))] # Items and bomb target that can be reached in a survivable manner if "escape" in info: reachable_items, _, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable, might_powerup=info["escape"]) # might_powerup is the escape from collapse else: _, _, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # # If I have seen an enemy recently and cannot see him now, them move to the last seen position # action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: return action.value # # If I have seen a teammate recently, them move away from the last seen position # action = self._action_away_from_teammate( my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: return action.value # # Move to the places that will not be collapsed # if "escape" in info: # might_powerup is the escape from collapse action = self._action_to_might_powerup(my_position, reachable_items, prev, is_survivable) if action is not None: print("Escape from collapse", action) return action.value # # Move towards a fog where we have not seen longest # action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: #if True: if self.random.uniform() < 0.8: return action.value # # Choose most survivable action # max_block = 0 best_action = None for action in survivable_actions: if action == constants.Action.Bomb: continue score = block[action] if action != constants.Action.Bomb: score += np.random.uniform(0, 1e-3) if score > max_block: max_block = score best_action = action if best_action is None: max_p = 0 best_action = None for action in p_survivable: score = p_survivable[action] if action != constants.Action.Bomb: score += np.random.uniform(0, 1e-3) if score > max_p: max_p = score best_action = action if best_action is None: # this should not be the case return None else: return best_action.value
def _find_safe_directions(self, board, my_position, unsafe_directions, bombs, enemies, item): def is_stuck_direction(next_position, bomb_range, next_board, enemies): '''Helper function to do determine if the agents next move is possible.''' Q = queue.PriorityQueue() Q.put((0, next_position)) seen = set() next_x, next_y = next_position is_stuck = True while not Q.empty(): dist, position = Q.get() seen.add(position) #FIXME is_stuck=False position_x, position_y = position if next_x != position_x and next_y != position_y: is_stuck = False break if dist > bomb_range: is_stuck = False break for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + position_x, col + position_y) if new_position in seen: continue if not utility.position_on_board(next_board, new_position): continue if not utility.position_is_passable( next_board, new_position, enemies): continue dist = abs(row + position_x - next_x) + abs(col + position_y - next_y) Q.put((dist, new_position)) return is_stuck # All directions are unsafe. Return a position that won't leave us locked. safe = [] if len(unsafe_directions) == 4: next_board = board.copy() next_board[my_position] = constants.Item.Bomb.value disallowed = [] for direction, bomb_range in unsafe_directions.items(): next_position = utility.get_next_position( my_position, direction) next_x, next_y = next_position if not utility.position_on_board(next_board, next_position) or \ not utility.position_is_passable(next_board, next_position, enemies): disallowed.append(direction) continue if not is_stuck_direction(next_position, bomb_range, next_board, enemies): # We found a direction that works. The .items provided # a small bit of randomness. So let's go with this one. return [direction] if not safe: #当决定不动之前,判断是否是原地放炸弹,如果是原地放炸弹那么从unsafe_directions中随机一个 # for i in bombs: # if len(bombs) == 1 : if len(item[constants.Item(3)]) == 1: # if my_position == i['position']: for bomb in bombs: if my_position == bomb['position']: safe = [ k for k in unsafe_directions if k not in disallowed ] # break if not safe: safe = [constants.Action.Stop] return safe x, y = my_position disallowed = [] # The directions that will go off the board. for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: position = (x + row, y + col) direction = utility.get_direction(my_position, position) # Don't include any direction that will go off of the board. if not utility.position_on_board(board, position): disallowed.append(direction) continue # Don't include any direction that we know is unsafe. if direction in unsafe_directions: #当这个不安全位置不能通过的时候就disallow,防止踢炸弹 if not utility.position_is_passable(board, position, enemies): disallowed.append(direction) #当往不安全方向走,正好被炸死的话,那么就不能走。(刚好被炸死需要通过life来制定) # if continue if utility.position_is_passable( board, position, enemies) or utility.position_is_fog( board, position): #可能移动一个位置,隔壁存在炸弹 safe.append(direction) for bomb in bombs: if bomb['bomb_life'] == 1: bomb_x, bomb_y = bomb['position'] if bomb_x == position[0] and abs( bomb_y - position[1]) <= bomb['blast_strength']: #remove the direction safe.pop() break elif bomb_y == position[1] and abs( bomb_x - position[0]) <= bomb['blast_strength']: safe.pop() break if not safe: # We don't have any safe directions, so return something that is allowed. safe = [k for k in unsafe_directions if k not in disallowed] if not safe: # We don't have ANY directions. So return the stop choice. return [constants.Action.Stop] return safe
def act(self, obs, action_space): t0 = time.perf_counter() # The number of steps self._steps += 1 # TODO: deepcopy are not needed with Docker board = deepcopy(obs["board"]) bomb_life = deepcopy(obs["bomb_life"]) bomb_blast_strength = deepcopy(obs["bomb_blast_strength"]) my_position = obs["position"] my_enemies = [constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None teammate_position = None if my_teammate is not None: rows, cols = np.where(board==my_teammate.value) if len(rows): teammate_position = (rows[0], cols[0]) info = dict() info["prev_action"] = self._prev_action info["prev_position"] = self._prev_position # # Whether each location is Rigid # # False may be an unknown # if self._num_rigid_found < constants.NUM_RIGID: self._is_rigid += (board == constants.Item.Rigid.value) self._is_rigid += (board.T == constants.Item.Rigid.value) self._num_rigid_found = np.sum(self._is_rigid) self._rigid_locations = np.where(self._is_rigid) self._unreachable = ~self._get_reachable(self._is_rigid) self._unreachable_locations = np.where(self._unreachable) # # What we have seen last time, and how many steps have past since then # # Once we see a Rigid, we always see it # visible_locations = np.where(board != constants.Item.Fog.value) self._last_seen[visible_locations] = board[visible_locations] #self._last_seen[self._rigid_locations] = constants.Item.Rigid.value # unreachable -> rigid self._last_seen[self._unreachable_locations] = constants.Item.Rigid.value self._since_last_seen += 1 self._since_last_seen[visible_locations] = 0 self._since_last_seen[np.where(self._is_rigid)] = 0 if self._steps == 0: # We have some knowledge about the initial configuration of the board C = constants.BOARD_SIZE - 2 self._last_seen[(1, 1)] = constants.Item.Agent0.value self._last_seen[(C, 1)] = constants.Item.Agent1.value self._last_seen[(C, C)] = constants.Item.Agent2.value self._last_seen[(1, C)] = constants.Item.Agent3.value rows = np.array([1, C, 1, C]) cols = np.array([1, 1, C, C]) self._since_last_seen[(rows, cols)] = 0 rows = np.array([1, 1, 1, 1, 2, 3, C - 1, C - 2, C, C, C, C, 2, 3, C - 1, C - 2]) cols = np.array([2, 3, C - 1, C - 2, 1, 1, 1, 1, 2, 3, C - 1, C - 2, C, C, C, C]) self._last_seen[(rows, cols)] = constants.Item.Passage.value self._since_last_seen[(rows, cols)] = 0 info["since_last_seen"] = self._since_last_seen info["last_seen"] = self._last_seen # # Modify the board # #fog_positions = np.where(board==constants.Item.Fog.value) #board[fog_positions] = self._last_seen[fog_positions] board[self._unreachable_locations] = constants.Item.Rigid.value # # Summarize information about bombs # # curr_bombs : list of current bombs # moving_direction : array of moving direction of bombs info["curr_bombs"], info["moving_direction"] \ = self._get_bombs(board, bomb_blast_strength, self._prev_bomb_blast_strength, bomb_life, self._prev_bomb_life) self._prev_bomb_life = bomb_life.copy() self._prev_bomb_blast_strength = bomb_blast_strength.copy() # # Bombs to be exploded in the next step # curr_bomb_position_strength = list() rows, cols = np.where(bomb_blast_strength > 0) for position in zip(rows, cols): strength = int(bomb_blast_strength[position]) curr_bomb_position_strength.append((position, strength)) # # Summarize information about flames # if self._prev_board is not None: info["curr_flames"], self._prev_flame_life \ = self._get_flames(board, self._prev_board[-1], self._prev_flame_life, self._prev_bomb_position_strength, curr_bomb_position_strength, self._prev_moving_direction) else: info["curr_flames"] = [] info["flame_life"] = self._prev_flame_life self._prev_moving_direction = deepcopy(info["moving_direction"]) self._prev_bomb_position_strength = curr_bomb_position_strength # # List of simulated boards, assuming enemies stay unmoved # info["list_boards_no_move"], _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=0) # # Might appear item from flames # info["might_powerup"] = np.full(self.board_shape, False) if self._prev_board is None: # Flame life is 2 # flame life is hardcoded in pommmerman/characters.py class Flame self._prev_board = [deepcopy(board), deepcopy(board), deepcopy(board)] else: old_board = self._prev_board.pop(0) self._prev_board.append(deepcopy(board)) if self._might_remaining_powerup: # was wood and now flames was_wood = (old_board == constants.Item.Wood.value) now_flames = (board == constants.Item.Flames.value) info["might_powerup"] = was_wood * now_flames # now wood and will passage now_wood = (board == constants.Item.Wood.value) become_passage = (info["list_boards_no_move"][-1] ==constants.Item.Passage.value) info["might_powerup"] += now_wood * become_passage maybe_powerup = info["might_powerup"] \ + (self._last_seen == constants.Item.Wood.value) \ + (self._last_seen == constants.Item.ExtraBomb.value) \ + (self._last_seen == constants.Item.IncrRange.value) \ + (self._last_seen == constants.Item.Kick.value) if not maybe_powerup.any(): self._might_remaining_powerup = False # update the estimate of enemy blast strength rows, cols = np.where(bomb_life == constants.DEFAULT_BOMB_LIFE - 1) for position in zip(rows, cols): if position == my_position: continue enemy = board[position] self._agent_blast_strength[enemy] = bomb_blast_strength[position] info["agent_blast_strength"] = self._agent_blast_strength # update the last seen enemy position for agent in self._since_last_seen_agent: self._since_last_seen_agent[agent] += 1 for enemy in my_enemies: rows, cols = np.where(board == enemy.value) if len(rows): self._last_seen_agent_position[enemy] = (rows[0], cols[0]) self._since_last_seen_agent[enemy] = 0 continue if teammate_position is not None: self._last_seen_agent_position[my_teammate] = teammate_position self._since_last_seen_agent[my_teammate] = 0 info["last_seen_agent_position"] = self._last_seen_agent_position info["since_last_seen_agent"] = self._since_last_seen_agent # # Choose a slave to act # if self._isolated: is_wood_visible = (constants.Item.Wood.value in board) is_closed = self._is_closed(board, my_position) if any([not is_wood_visible, not is_closed]): self._isolated = False action = None if self._isolated: # Act with an agent who do not consider other agents if verbose: print("IsolatedAgent: ", end="") action = self.isolated_slave.act(obs, action_space, info) # elif not self._might_remaining_powerup: # Act with an agent who do not consider powerups # print("BattleAgent: ", end="") # action = self.battle_slave.act(obs, action_space, info) else: if verbose: print("GenericAgent: ", end="") action = self.generic_slave.act(obs, action_space, info) if action is None: # Act with a special agent, who only seeks to survive if verbose: print("\nSurvivingAgent: ", end="") action = self.surviving_slave.act(obs, action_space, info) this_time = time.perf_counter() - t0 if this_time > self.max_time: self.max_time = this_time self._prev_action = action self._prev_position = my_position return action
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] recently_seen_positions = (info["since_last_seen"] < 3) board[recently_seen_positions] = info["last_seen"][recently_seen_positions] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None my_kick = obs["can_kick"] # whether I can kick if verbose: print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t") my_next_position = {constants.Action.Stop: my_position, constants.Action.Bomb: my_position} for action in [constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right]: next_position = self._get_next_position(my_position, action) if self._on_board(next_position): if board[next_position] == constants.Item.Rigid.value: my_next_position[action] = None else: my_next_position[action] = next_position else: my_next_position[action] = None # # Understand current situation # if all([info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb], info["prev_position"] == my_position]): # if previously blocked, do not reapeat with some probability self._inv_tmp *= self._backoff else: self._inv_tmp = self._inv_tmp_init # enemy positions enemy_positions = list() for enemy in my_enemies: rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_positions.append((rows[0], cols[0])) # teammate position teammate_position = None if my_teammate is not None: rows, cols = np.where(board==my_teammate.value) if len(rows): teammate_position = (rows[0], cols[0]) # Positions where we kick a bomb if we move to if my_kick: kickable, might_kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() might_kickable = set() # positions that might be blocked if teammate_position is None: agent_positions = enemy_positions else: agent_positions = enemy_positions + [teammate_position] might_blocked = self._get_might_blocked(board, my_position, agent_positions, might_kickable) # enemy positions over time # these might be dissappeared due to extra flames if len(enemy_positions): rows = [p[0] for p in enemy_positions] cols = [p[1] for p in enemy_positions] list_enemy_positions = [(rows, cols)] _enemy_positions = list() for t in range(self._enemy_mobility): rows, cols = list_enemy_positions[-1] for x, y in zip(rows, cols): for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]: next_position = (x + dx, y + dy) if not self._on_board(next_position): continue _board = info["list_boards_no_move"][t] if utility.position_is_passage(_board, next_position): _enemy_positions.append(next_position) _enemy_positions = set(_enemy_positions) rows = [p[0] for p in _enemy_positions] cols = [p[1] for p in _enemy_positions] list_enemy_positions.append((rows, cols)) else: list_enemy_positions = [] # survivable actions is_survivable = dict() for a in self._get_all_actions(): is_survivable[a] = False n_survivable = dict() list_boards = dict() for my_action in self._get_all_actions(): next_position = my_next_position[my_action] if next_position is None: continue if my_action == constants.Action.Bomb: if any([my_ammo == 0, obs["bomb_blast_strength"][next_position] > 0]): continue if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1]): continue if all([my_action != constants.Action.Stop, obs["bomb_blast_strength"][next_position] > 0, next_position not in set.union(kickable, might_kickable)]): continue if next_position in set.union(kickable, might_kickable): # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue # list of boards from next steps list_boards[my_action], _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=my_action, can_kick=my_kick, enemy_mobility=self._enemy_mobility, enemy_bomb=self._enemy_bomb, agent_blast_strength=info["agent_blast_strength"]) # agents might be disappeared, because of overestimated bombs for t, positions in enumerate(list_enemy_positions): list_boards[my_action][t][positions] = constants.Item.AgentDummy.value # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards[my_action])): flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[my_action][t][flame_positions] = constants.Item.Flames.value """ processed = Parallel(n_jobs=-1, verbose=0)( [delayed(search_time_expanded_network)(list_boards[action][1:], my_next_position[action], action) for action in list_boards] ) for survivable, my_action in processed: if my_next_position[my_action] in survivable[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]] """ for my_action in list_boards: survivable = search_time_expanded_network(list_boards[my_action][1:], my_next_position[my_action]) if my_next_position[my_action] in survivable[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]] survivable_actions = list() for a in is_survivable: if not is_survivable[a]: continue if might_blocked[a] and not is_survivable[constants.Action.Stop]: continue if n_survivable[a][-1] <= 1: is_survivable[a] = False continue survivable_actions.append(a) # # Choose action # if len(survivable_actions) == 0: # # return None, if no survivable actions # return None elif len(survivable_actions) == 1: # # Choose the survivable action, if it is the only choice # action = survivable_actions[0] if verbose: print("The only survivable action", action) return action.value # # Bomb at a target # # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) if teammate_position is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"]) """ np.set_printoptions(precision=3) print("enemy") print(total_frac_blocked) print("teammate") print(total_frac_blocked_teammate) print("product") prod = total_frac_blocked * (1 - total_frac_blocked_teammate) print(prod[:5,:5]) """ p_survivable = defaultdict(float) for action in n_survivable: p_survivable[action] = sum(n_survivable[action]) / self._my_survivability_threshold if p_survivable[action] > 1: p_survivable[action] = 1 block = defaultdict(float) for action in [constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right]: next_position = my_next_position[action] if next_position is None: continue if next_position in set.union(kickable, might_kickable): # kick will be considered later continue if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop]]): # if the next position is flames, # I want to stop to wait, which must be feasible block[action] = total_frac_blocked[next_position] * p_survivable[constants.Action.Stop] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) continue elif not is_survivable[action]: continue if all([might_blocked[action], not is_survivable[constants.Action.Stop]]): continue block[action] = total_frac_blocked[next_position] * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if might_blocked[action]: block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop] + total_frac_blocked[next_position] * p_survivable[action]) / 2 if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if is_survivable[constants.Action.Bomb]: list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb) n_survivable_nodes_with_bomb = defaultdict(int) for enemy in my_enemies: # get survivable tree of the enemy rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable = search_time_expanded_network(list_boards_with_bomb, enemy_position) n_survivable_nodes_with_bomb[enemy] = sum([len(positions) for positions in _survivable]) n_with_bomb = sum([n_survivable_nodes_with_bomb[enemy] for enemy in my_enemies]) n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked_with_bomb = 0 # place more bombs, so the stacked enemy cannot kick x, y = my_position for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + dx, y + dy) following_position = (x + 2 * dx, y + 2 * dy) if not self._on_board(following_position): continue if all([obs["bomb_life"][next_position] > 0, board[following_position] > constants.Item.AgentDummy.value]): total_frac_blocked_with_bomb = 1 else: total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none if teammate_position is not None: # get survivable tree of the teammate _survivable = search_time_expanded_network(list_boards_with_bomb, teammate_position) n_survivable_nodes_with_bomb_teammate = sum([len(positions) for positions in _survivable]) n_with_bomb = n_survivable_nodes_with_bomb_teammate n_with_none = n_survivable_nodes_teammate[my_teammate] if n_with_none == 0: total_frac_blocked_with_bomb_teammate = 0 else: total_frac_blocked_with_bomb_teammate = 1 - n_with_bomb / n_with_none action = constants.Action.Bomb block[action] = total_frac_blocked_with_bomb * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_with_bomb_teammate) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) for next_position in kickable: action = self._get_direction(my_position, next_position) if not is_survivable[action]: continue list_boards_with_kick, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=action, can_kick=True) n_survivable_nodes_with_kick = defaultdict(int) for enemy in my_enemies: # get survivable tree of the enemy rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable = search_time_expanded_network(list_boards_with_kick, enemy_position) n_survivable_nodes_with_kick[enemy] = sum([len(positions) for positions in _survivable]) n_with_kick = sum([n_survivable_nodes_with_kick[enemy] for enemy in my_enemies]) n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked[next_position] = 0 else: total_frac_blocked[next_position] = 1 - n_with_kick / n_with_none if teammate_position is not None: # get survivable tree of the teammate _survivable = search_time_expanded_network(list_boards_with_kick, teammate_position) n_survivable_nodes_with_kick_teammate = sum([len(positions) for positions in _survivable]) n_with_kick = n_survivable_nodes_with_kick_teammate n_with_none = n_survivable_nodes_teammate[my_teammate] if n_with_none == 0: total_frac_blocked_teammate[next_position] = 0 else: total_frac_blocked_teammate[next_position] = 1 - n_with_kick / n_with_none block[action] = total_frac_blocked[next_position] * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) max_block = -np.inf best_action = None for action in block: if block[action] > max_block: max_block = block[action] best_action = action if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp: if teammate_position is not None: teammate_safety = total_frac_blocked_with_bomb_teammate * n_survivable_nodes_with_bomb_teammate if any([teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_with_bomb_teammate < self._interfere_threshold, total_frac_blocked_with_bomb_teammate < total_frac_blocked_teammate[my_position]]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if teammate_ok: if best_action == constants.Action.Bomb: if verbose: print("Bomb is best", constants.Action.Bomb) return constants.Action.Bomb.value if best_action == constants.Action.Stop: if verbose: print("Place a bomb at a locally optimal position", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = my_next_position[best_action] should_chase = (total_frac_blocked[next_position] > self._chase_threshold) if teammate_position is not None: teammate_safety = total_frac_blocked_teammate[next_position] * n_survivable_nodes_teammate[my_teammate] if any([teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_teammate[next_position] < self._interfere_threshold, total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position]]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if should_chase and teammate_ok: if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop]]): action = constants.Action.Stop if verbose: print("Wait flames life", action) return action.value else: if verbose: print("Move towards better place to bomb", best_action) return best_action.value # Exclude the action representing stop to wait max_block = -np.inf best_action = None for action in survivable_actions: if block[action] > max_block: max_block = block[action] best_action = action # # Do not take risky actions # most_survivable_action = self._action_most_survivable(n_survivable) # ignore actions with low survivability _survivable_actions = list() for action in n_survivable: n = sum(n_survivable[action]) if not is_survivable[action]: continue elif n > self._my_survivability_threshold: _survivable_actions.append(action) else: print("RISKY", action) is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: if verbose: print("Take the best action in danger", best_action) return best_action.value else: # Take the most survivable action if verbose: print("Take the most survivable action", most_survivable_action) return most_survivable_action.value # # Do not interfere with teammate # if all([teammate_position is not None, len(enemy_positions) > 0 or len(info["curr_bombs"]) > 0]): # ignore actions that interfere with teammate min_interfere = np.inf least_interfere_action = None _survivable_actions = list() for action in survivable_actions: if action == constants.Action.Bomb: frac = total_frac_blocked_with_bomb_teammate else: next_position = my_next_position[action] frac = total_frac_blocked_teammate[next_position] if frac < min_interfere: min_interfere = frac least_interfere_action = action if frac < self._interfere_threshold: _survivable_actions.append(action) else: print("INTERFERE", action) is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: # Take the least interfering action if verbose: print("Take the best action in intereference", best_action) return best_action.value else: if verbose: print("Take the least interfering action", least_interfere_action) return least_interfere_action.value consider_bomb = True if not is_survivable[constants.Action.Bomb]: consider_bomb = False # # Find reachable items # # List of boards simulated list_boards, _ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=self._enemy_mobility, enemy_bomb=self._enemy_bomb, agent_blast_strength=info["agent_blast_strength"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, _, _ = self._search_time_expanded_network(list_boards, my_position) if len(survivable[-1]) == 0: survivable = [set() for _ in range(len(survivable))] # Items and bomb target that can be reached in a survivable manner _, _, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # # If I have seen an enemy recently and cannot see him now, them move to the last seen position # action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info, my_enemies) if action is not None: if verbose: print("Moving toward last seen enemy", action) return action.value # # If I have seen a teammate recently, them move away from the last seen position # action = self._action_away_from_teammate(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info, my_teammate) if action is not None: if verbose: print("Moving away from last seen teammate", action) return action.value # # Move towards a fog where we have not seen longest # action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: #if True: if self.random.uniform() < 0.8: if verbose: print("Moving toward oldest fog", action) return action.value # # Choose most survivable action # max_block = -np.inf best_action = None for action in survivable_actions: if action == constants.Action.Bomb: continue if block[action] > max_block: max_block = block[action] best_action = action if verbose: print("Take the best action among safe actions (nothing else to do)", best_action) if best_action is None: # this should not be the case return None else: return best_action.value
def act(self, obs, action_space): # The number of steps self._steps += 1 # Collapse the board if just collapsed in the previous step info = dict() info["steps"] = self._steps info["recently_seen"] = deepcopy(obs["board"]) if self._just_collapsed is not None: L = self._just_collapsed U = obs["board"].shape[0] - 1 - L flames_positions = np.where( obs["board"] == constants.Item.Flames.value) info["recently_seen"][L, :][L:U + 1] = constants.Item.Rigid.value info["recently_seen"][U, :][L:U + 1] = constants.Item.Rigid.value info["recently_seen"][:, L][L:U + 1] = constants.Item.Rigid.value info["recently_seen"][:, U][L:U + 1] = constants.Item.Rigid.value info["recently_seen"][ flames_positions] = constants.Item.Flames.value obs["bomb_life"][L, :][L:U + 1] = 0 obs["bomb_life"][U, :][L:U + 1] = 0 obs["bomb_life"][:, L][L:U + 1] = 0 obs["bomb_life"][:, U][L:U + 1] = 0 obs["bomb_blast_strength"][L, :][L:U + 1] = 0 obs["bomb_blast_strength"][U, :][L:U + 1] = 0 obs["bomb_blast_strength"][:, L][L:U + 1] = 0 obs["bomb_blast_strength"][:, U][L:U + 1] = 0 # # Whether each location is Rigid # # False may be an unknown # if self._num_rigid_found < constants.NUM_RIGID: self._is_rigid += (obs["board"] == constants.Item.Rigid.value) self._is_rigid += (obs["board"].T == constants.Item.Rigid.value) self._num_rigid_found = np.sum(self._is_rigid) self._unreachable = ~self._get_reachable(self._is_rigid) self._unreachable_locations = np.where(self._unreachable) # # What we have seen last time, and how many steps have past since then # visible_locations = np.where(obs["board"] != constants.Item.Fog.value) self._last_seen[visible_locations] = obs["board"][visible_locations] self._last_seen[ self._unreachable_locations] = constants.Item.Rigid.value self._since_last_seen += 1 self._since_last_seen[visible_locations] = 0 self._since_last_seen[np.where(self._is_rigid)] = 0 if self._steps == 0: # We have some knowledge about the initial configuration of the board C = constants.BOARD_SIZE - 2 self._last_seen[(1, 1)] = constants.Item.Agent0.value self._last_seen[(C, 1)] = constants.Item.Agent1.value self._last_seen[(C, C)] = constants.Item.Agent2.value self._last_seen[(1, C)] = constants.Item.Agent3.value rows = np.array([1, C, 1, C]) cols = np.array([1, 1, C, C]) self._since_last_seen[(rows, cols)] = 0 rows = np.array([ 1, 1, 1, 1, 2, 3, C - 1, C - 2, C, C, C, C, 2, 3, C - 1, C - 2 ]) cols = np.array([ 2, 3, C - 1, C - 2, 1, 1, 1, 1, 2, 3, C - 1, C - 2, C, C, C, C ]) self._last_seen[(rows, cols)] = constants.Item.Passage.value self._since_last_seen[(rows, cols)] = 0 # # We know exactly how my teamate is digging # my_position = obs["position"] if self._steps == 33: passage_under_fog \ = (self._last_seen.T == constants.Item.Passage.value) * (self._last_seen == constants.Item.Fog.value) positions = np.where(passage_under_fog) self._last_seen[positions] = constants.Item.Passage.value self._since_last_seen[positions] = 0 info["since_last_seen"] = self._since_last_seen info["last_seen"] = self._last_seen if not self._just_collapsed: # then we do not see the true board, so skip recently_seen_positions = (info["since_last_seen"] < self._use_last_seen) info["recently_seen"][recently_seen_positions] = info["last_seen"][ recently_seen_positions] # TODO: deepcopy are not needed with Docker board = info["recently_seen"] bomb_life = obs["bomb_life"] bomb_blast_strength = obs["bomb_blast_strength"] my_enemies = [ constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy ] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None info["prev_action"] = self._prev_action info["prev_position"] = self._prev_position # # Modify the board # board[self._unreachable_locations] = constants.Item.Rigid.value # # Summarize information about bombs # # curr_bombs : list of current bombs # moving_direction : array of moving direction of bombs info["curr_bombs"], info["moving_direction"] \ = self._get_bombs(obs["board"], # use observation to keep the bombs under fog bomb_blast_strength, self._prev_bomb_blast_strength, bomb_life, self._prev_bomb_life) self._prev_bomb_life = bomb_life.copy() self._prev_bomb_blast_strength = bomb_blast_strength.copy() # # Bombs to be exploded in the next step # curr_bomb_position_strength = list() rows, cols = np.where(bomb_blast_strength > 0) for position in zip(rows, cols): strength = int(bomb_blast_strength[position]) curr_bomb_position_strength.append((position, strength)) # # Summarize information about flames # if self._prev_board is not None: info["curr_flames"], self._prev_flame_life \ = self._get_flames(obs["board"], # use observation to keep the bombs under fog self._prev_board[-1], self._prev_flame_life, self._prev_bomb_position_strength, curr_bomb_position_strength, self._prev_moving_direction) else: info["curr_flames"] = [] info["flame_life"] = self._prev_flame_life self._prev_moving_direction = deepcopy(info["moving_direction"]) self._prev_bomb_position_strength = curr_bomb_position_strength # # List of simulated boards, assuming enemies stay unmoved # step_to_collapse = None collapse_ring = None if obs["game_env"] == 'pommerman.envs.v1:Pomme': # Collapse mode # cannot trust the board just collapsed, so skip if self._just_collapsed is None: already_collapsed = (self._when_collapse < self._steps) not_rigid = (obs["board"] != constants.Item.Rigid.value) * ( obs["board"] != constants.Item.Fog.value) not_collapsed_positions = np.where(already_collapsed * not_rigid) self._when_collapse[not_collapsed_positions] = np.inf collapse_steps = [ step for step in self._collapse_steps if step >= self._steps ] if len(collapse_steps): step_to_collapse = min(collapse_steps) - self._steps collapse_ring = len(self._collapse_steps) - len(collapse_steps) if step_to_collapse == 0: self._just_collapsed = collapse_ring else: self._just_collapsed = None else: self._just_collapsed = None info["step_to_collapse"] = step_to_collapse info["collapse_ring"] = collapse_ring info["list_boards_no_move"], _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=0, step_to_collapse=step_to_collapse, collapse_ring=collapse_ring) # # Might appear item from flames # info["might_powerup"] = np.full(self.board_shape, False) if self._prev_board is None: # Flame life is 2 # flame life is hardcoded in pommmerman/characters.py class Flame self._prev_board = [ deepcopy(board), deepcopy(board), deepcopy(board) ] else: old_board = self._prev_board.pop(0) self._prev_board.append(deepcopy(board)) if self._might_remaining_powerup: # was wood and now flames was_wood = (old_board == constants.Item.Wood.value) now_flames = (board == constants.Item.Flames.value) info["might_powerup"] = was_wood * now_flames # now wood and will passage now_wood = (board == constants.Item.Wood.value) become_passage = (info["list_boards_no_move"][-1] == constants.Item.Passage.value) info["might_powerup"] += now_wood * become_passage maybe_powerup = info["might_powerup"] \ + (self._last_seen == constants.Item.Wood.value) \ + (self._last_seen == constants.Item.ExtraBomb.value) \ + (self._last_seen == constants.Item.IncrRange.value) \ + (self._last_seen == constants.Item.Kick.value) if not maybe_powerup.any(): self._might_remaining_powerup = False # update the estimate of enemy blast strength rows, cols = np.where(bomb_life == constants.DEFAULT_BOMB_LIFE - 1) for position in zip(rows, cols): if position == my_position: continue enemy = board[position] self._agent_blast_strength[enemy] = bomb_blast_strength[position] info["agent_blast_strength"] = self._agent_blast_strength # enemy positions info["enemy_positions"] = list() for enemy in my_enemies: rows, cols = np.where(board == enemy.value) if len(rows) == 1: info["enemy_positions"].append((rows[0], cols[0])) elif len(rows) > 1: # choose the most recently seen enemy of this ID, because only one time_passed = info["since_last_seen"][(rows, cols)] idx = np.argmin(time_passed) enemy_position = (rows[idx], cols[idx]) board[( rows, cols )] = constants.Item.Passage.value # overwrite old teammates by passage board[enemy_position] = enemy.value info["enemy_positions"].append(enemy_position) # teammate position info["teammate_position"] = None if my_teammate is not None: rows, cols = np.where(board == my_teammate.value) if len(rows) == 1: info["teammate_position"] = (rows[0], cols[0]) elif len(rows) > 1: # choose the most recently seen teammate, because only one time_passed = info["since_last_seen"][(rows, cols)] idx = np.argmin(time_passed) info["teammate_position"] = (rows[idx], cols[idx]) board[( rows, cols )] = constants.Item.Passage.value # overwrite old teammates by passage board[info["teammate_position"]] = my_teammate.value # next positions info["my_next_position"] = {constants.Action.Stop: my_position} if all([obs["ammo"] > 0, obs["bomb_blast_strength"][my_position] == 0]): info["my_next_position"][constants.Action.Bomb] = my_position else: info["my_next_position"][constants.Action.Bomb] = None for action in [ constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: next_position = self._get_next_position(my_position, action) if self._on_board(next_position): if board[next_position] in [ constants.Item.Rigid.value, constants.Item.Wood.value ]: info["my_next_position"][action] = None else: info["my_next_position"][action] = next_position else: info["my_next_position"][action] = None # kickable positions if obs["can_kick"]: is_bomb = np.full(self.board_shape, False) is_bomb[np.where(obs["bomb_blast_strength"] > 0)] = True info["kickable"], info["might_kickable"] \ = self._kickable_positions(obs, is_bomb, info["moving_direction"]) info["all_kickable"] = set.union(info["kickable"], info["might_kickable"]) else: info["kickable"] = set() info["might_kickable"] = set() info["all_kickable"] = set() # might block/blocked actions # I am the leader if agent0 or agent1 # I am the follower otherwise # If leader, not blocked by teammate # If follower, do not block teammate info["might_blocked"] = self._get_might_blocked( board, my_position, info["enemy_positions"], info["might_kickable"]) if all([ board[my_position] in [constants.Item.Agent2.value, constants.Item.Agent3.value], info["teammate_position"] is not None ]): info["might_block_teammate"] = self._get_might_blocked( board, my_position, [info["teammate_position"]], info["might_kickable"]) else: info["might_block_teammate"] = defaultdict(bool) info["might_block_actions"] = set([ a for a in info["might_block_teammate"] if info["might_block_teammate"][a] ]) # # Choose a slave to act # if self._isolated: is_wood_visible = (constants.Item.Wood.value in board) is_closed = self._is_closed(board, my_position) if any([not is_wood_visible, not is_closed]): self._isolated = False action = None if self._isolated: # Act with an agent who do not consider other dypmAgents action = self.isolated_slave.act(obs, action_space, info) elif not self._might_remaining_powerup: # Act with an agent who do not consider powerups if obs["game_env"] == 'pommerman.envs.v1:Pomme': info["escape"] = (self._when_collapse == np.inf) * ( info["last_seen"] == constants.Item.Passage.value) action = self.battle_slave.act(obs, action_space, info) else: action = self.generic_slave.act(obs, action_space, info) if action is None: # Act with a special agent, who only seeks to survive action = self.surviving_slave.act(obs, action_space, info) self._prev_action = action self._prev_position = my_position return action
def obs_to_net_in(self, obs): """ Takes the observation dictionary and turns value into feature planes of same shape """ # Handle order of the keys key_list = [ "bomb_life", "bomb_blast_strength", "bomb_moving_direction", "flame_life", "blast_strength", "can_kick", "ammo", #"game_type", #"game_env", #"step_count", #"alive", #"position", #"teammate", #"enemies", #"message", "board" ] type_dict = { # Full board to break down "board": "full_board", # Board type values: "bomb_blast_strength": "board", "bomb_life": "board", "bomb_moving_direction": "board", "flame_life": "board", # Single value type values: "blast_strength": "val", "can_kick": "val", "ammo": "val", "game_type": "val", "game_env": "val", "step_count": "val", # Special type values: "alive": None, "position": "pos", "message": "pos", "teammate": "mate", "enemies": "enemy" } # Rating of items (Prefere power-ups, avoid flames) desire_dict = { constants.Item.ExtraBomb.value: 0, constants.Item.Kick.value: 0, constants.Item.IncrRange.value: 0, constants.Item.Wood.value: 1, constants.Item.Passage.value: 2, constants.Item.Fog.value: 3, constants.Item.Rigid.value: 5, constants.Item.Agent0.value: 6, constants.Item.Agent1.value: 6, constants.Item.Agent2.value: 6, constants.Item.Agent3.value: 6, constants.Item.AgentDummy.value: 6, constants.Item.Bomb.value: 7, constants.Item.Flames.value: 8 } board_shape = obs['board'].shape agents = [ constants.Item.Agent0, constants.Item.Agent1, constants.Item.Agent2, constants.Item.Agent3, constants.Item.AgentDummy ] enemies = obs['enemies'] mate = obs['teammate'] alive = obs['alive'] player = [constants.Item(e) for e in alive] for e in enemies: desire_dict[e.value] = 4 try: player.remove(e) except: pass try: player.remove(mate) except: pass net_in = np.zeros( (*board_shape, len(key_list) + len(constants.Item) - 1)) for idx, key in enumerate(key_list): # Determine current value_type to handle value accordingly value = obs[key] value_type = type_dict[key] if value_type == 'full_board': # Break board down in its different features desire_plane = np.zeros(shape=board_shape, dtype=np.float32) enemy_plane = np.zeros(shape=board_shape, dtype=np.float32) mate_plane = np.zeros(shape=board_shape, dtype=np.float32) self_plane = np.zeros(shape=board_shape, dtype=np.float32) i = 0 # One-hot encode every item for e in constants.Item: feature_plane = np.where( value == e.value, np.ones(shape=board_shape, dtype=np.float32), np.zeros(shape=board_shape, dtype=np.float32)) if e in agents: if e in enemies: enemy_plane += feature_plane if e == mate: mate_plane += feature_plane if e in player: self_plane = feature_plane else: net_in[:, :, idx + i] = feature_plane i += 1 # Encode desired positions desire_plane += feature_plane * desire_dict[e.value] if self.last_board is None: self.last_board = desire_plane.copy() net_in[:, :, idx + i] = enemy_plane i += 1 net_in[:, :, idx + i] = mate_plane i += 1 net_in[:, :, idx + i] = self_plane i += 1 net_in[:, :, idx + i] = self.last_board net_in[:, :, -1] = desire_plane self.last_board = desire_plane.copy() else: if value_type == 'board': # Value already board_shape feature_plane = np.array(value, dtype=np.float32) elif value_type == 'val': # Fill array of board_shape with value feature_plane = np.zeros(shape=board_shape, dtype=np.float32) feature_plane.fill(np.float32(value)) elif value_type == 'pos': # Mark position in zero plane feature_plane = np.zeros(shape=board_shape, dtype=np.float32) feature_plane[value[0], value[1]] = 1 elif value_type == 'mate': # Fill array of board_shape with mate value for alive mate feature_plane = np.zeros(shape=board_shape, dtype=np.float32) if value.value in obs['alive']: feature_plane.fill(np.float32(value.value)) elif value_type == 'enemy': # Sum up enemy values for alive enemies feature_plane = np.zeros(shape=board_shape, dtype=np.float32) for val in value: if val.value in obs['alive']: feature_plane += val.value else: # Create zero plane if value is not handled feature_plane = np.zeros(shape=board_shape, dtype=np.float32) net_in[:, :, idx] = feature_plane return net_in
def act(self, obs, action_space, info): # # Definitions # board = info['last_seen'] #board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] kickable, might_kickable \ = self._kickable_positions(obs, info["moving_direction"], consider_agents=True) # enemy positions enemy_positions = list() for enemy in my_enemies: rows, cols = np.where(board == enemy.value) if len(rows) == 0: continue enemy_positions.append((rows[0], cols[0])) # teammate position teammate_position = None if my_teammate is not None: rows, cols = np.where(board == my_teammate.value) if len(rows): teammate_position = (rows[0], cols[0]) # positions that might be blocked if teammate_position is None: agent_positions = enemy_positions else: agent_positions = enemy_positions + [teammate_position] might_blocked = self._get_might_blocked(board, my_position, agent_positions, might_kickable) # # Survivability, when enemy is replaced by a bomb, and no move afterwards # # replace enemy with bomb _bombs = deepcopy(info["curr_bombs"]) rows, cols = np.where(board > constants.Item.AgentDummy.value) for position in zip(rows, cols): if board[position] not in my_enemies: continue if obs["bomb_blast_strength"][position]: # already a bomb continue bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb position, constants.DEFAULT_BOMB_LIFE, enemy_blast_strength_map[position], None) _bombs.append(bomb) n_survivable_bomb = self._get_n_survivable(board, _bombs, info["curr_flames"], obs, my_position, set.union( kickable, might_kickable), enemy_mobility=0) # # Survivability, when enemy moves one position or stay unmoved # n_survivable_move = self._get_n_survivable(board, info["curr_bombs"], info["curr_flames"], obs, my_position, set.union( kickable, might_kickable), enemy_mobility=1) # # Survivability, when no enemies # _board = deepcopy(board) agent_positions = np.where(_board > constants.Item.AgentDummy.value) _board[agent_positions] = constants.Item.Passage.value _board[my_position] = board[my_position] _obs = { "position": obs["position"], "blast_strength": obs["blast_strength"], "ammo": obs["ammo"], "bomb_life": obs["bomb_life"], "board": _board } n_survivable_none = self._get_n_survivable(_board, info["curr_bombs"], info["curr_flames"], _obs, my_position, set.union( kickable, might_kickable), enemy_mobility=0) # # Survivable actions # survivable_actions_bomb = set( [a for a in n_survivable_bomb if n_survivable_bomb[a][-1] > 0]) survivable_actions_move = set( [a for a in n_survivable_move if n_survivable_move[a][-1] > 0]) survivable_actions_none = set( [a for a in n_survivable_none if n_survivable_none[a][-1] > 0]) survivable_actions = set.intersection(survivable_actions_bomb, survivable_actions_move, survivable_actions_none) # if can survive without possibility of being blocked, then do so if not constants.Action.Stop in survivable_actions: _survivable_actions = [ action for action in survivable_actions if not might_blocked[action] ] if len(_survivable_actions): survivable_action = _survivable_actions _survivable_actions_bomb = [ action for action in survivable_actions_bomb if not might_blocked[action] ] _survivable_actions_move = [ action for action in survivable_actions_move if not might_blocked[action] ] _survivable_actions_none = [ action for action in survivable_actions_none if not might_blocked[action] ] if all([ len(_survivable_actions_bomb) > 0, len(_survivable_actions_move) > 0, len(_survivable_actions_none) > 0 ]): survivable_action_bomb = _survivable_actions_bomb survivable_action_move = _survivable_actions_move survivable_action_none = _survivable_actions_none # # Choose actions # if len(survivable_actions) == 1: action = survivable_actions.pop() if verbose: print("Only survivable action", action) return action.value if len(survivable_actions) > 1: n_survivable_expected = dict() for a in survivable_actions: if might_blocked[a]: n_survivable_expected[a] \ = np.array(n_survivable_bomb[a]) \ + np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_none[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 elif a in [constants.Action.Stop, constants.Action.Bomb]: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + np.array(n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 else: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + np.array(n_survivable_none[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 action = self._get_most_survivable_action(n_survivable_expected) if verbose: print("Most survivable action", action) return action.value # no survivable actions for all cases survivable_actions = set( list(n_survivable_bomb.keys()) + list(n_survivable_move.keys()) + list(n_survivable_none.keys())) if len(survivable_actions) == 1: action = survivable_actions.pop() if verbose: print("Only might survivable action", action) return action.value if len(survivable_actions) > 1: for a in set.union(survivable_actions, {constants.Action.Stop}): if a not in n_survivable_bomb: n_survivable_bomb[a] = np.zeros(self._search_range) if a not in n_survivable_move: n_survivable_move[a] = np.zeros(self._search_range) if a not in n_survivable_none: n_survivable_none[a] = np.zeros(self._search_range) n_survivable_expected = dict() for a in survivable_actions: if might_blocked[a]: n_survivable_expected[a] \ = np.array(n_survivable_bomb[a]) \ + np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_none[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 elif a in [constants.Action.Stop, constants.Action.Bomb]: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + np.array(n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 else: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + np.array(n_survivable_none[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 action = self._get_most_survivable_action(n_survivable_expected) if verbose: print("Most might survivable action", action) return action.value # no survivable action found for any cases # TODO : Then consider killing enemies or helping teammate # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) if teammate_position is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"]) block = defaultdict(float) for action in [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: next_position = self._get_next_position(my_position, action) if not self._on_board(next_position): continue if board[next_position] in [ constants.Item.Rigid.value, constants.Item.Wood.value ]: continue if next_position in set.union(kickable, might_kickable): # kick will be considered later continue block[action] = total_frac_blocked[next_position] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if any([my_ammo > 0, obs["bomb_blast_strength"][my_position] == 0]): list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb) block[constants.Action.Bomb] \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes, board, my_enemies) if teammate_position is not None: block_teammate = self._get_frac_blocked_two_lists( list_boards_with_bomb, n_survivable_nodes, board, [my_teammate]) block[constants.Action.Bomb] *= (1 - block_teammate) block[constants.Action.Bomb] *= self._inv_tmp block[constants.Action.Bomb] -= np.log( -np.log(self.random.uniform())) for next_position in set.union(kickable, might_kickable): my_action = self._get_direction(my_position, next_position) list_boards_with_kick, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True) block[my_action] \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes, board, my_enemies) if teammate_position is not None: block_teammate = self._get_frac_blocked_two_lists( list_boards_with_kick, n_survivable_nodes, board, [my_teammate]) block[my_action] *= (1 - block_teammate) block[my_action] *= self._inv_tmp block[my_action] -= np.log(-np.log(self.random.uniform())) max_block = -np.inf best_action = None for action in block: if block[action] > max_block: max_block = block[action] best_action = action if best_action is not None: if verbose: print( "Best action to kill enemies or help teammate (cannot survive)" ) return best_action.value # The following will not be used if obs["ammo"] > 0 and obs["blast_strength"] == 0: action = constants.Action.Bomb if verbose: print("Suicide", action) return action.value kickable_positions = list(set.union(kickable, might_kickable)) if kickable_positions: self.random.shuffle(kickable_positions) action = self._get_direction(my_position, kickable_positions[0]) if verbose: print("Suicide kick", action) return action.value all_actions = [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Right, constants.Action.Left ] self.random.shuffle(all_actions) for action in all_actions: next_position = self._get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_wall(board, next_position): continue if verbose: print("Random action", action) return action.value action = constants.Action.Stop if verbose: print("No action found", action) return action.value
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] kickable, might_kickable \ = self._kickable_positions(obs, info["moving_direction"], consider_agents=True) # # Survivability, when enemy is replaced by a bomb, and no move afterwards # # replace enemy with bomb _bombs = deepcopy(info["curr_bombs"]) rows, cols = np.where(board > constants.Item.AgentDummy.value) for position in zip(rows, cols): if board[position] not in my_enemies: continue if obs["bomb_blast_strength"][position]: # already a bomb continue bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb position, constants.DEFAULT_BOMB_LIFE, enemy_blast_strength_map[position], None) _bombs.append(bomb) n_survivable_bomb = self._get_n_survivable(board, _bombs, info["curr_flames"], obs, my_position, set.union( kickable, might_kickable), enemy_mobility=0) print("survivable bomb") for a in n_survivable_bomb: print(a, n_survivable_bomb[a]) survivable_actions_bomb = set(n_survivable_bomb) # # Survivability, when enemy moves one position or stay unmoved # n_survivable_move = self._get_n_survivable(board, info["curr_bombs"], info["curr_flames"], obs, my_position, set.union( kickable, might_kickable), enemy_mobility=1) # If my move is survivable with bomb but not with move, # then my move must be blocked by an enemy. # I might be blocked by an enemy with such my move, # it will end up in stop and enemy is also stop, # so my survivability with such my move should be the # same as my survivability with stop when enemy stops if constants.Action.Stop in survivable_actions_bomb: for action in survivable_actions_bomb: if action in [constants.Action.Stop, constants.Action.Bomb]: continue if action not in n_survivable_move: n_survivable_move[action] = n_survivable_bomb[ constants.Action.Stop] survivable_actions_move = set(n_survivable_move) #print("survivable move") #for a in n_survivable_move: # print(a, n_survivable_move[a]) # if survivable by not stopping when enemy place a bomb, # then do not stop if survivable_actions_bomb - {constants.Action.Stop}: survivable_actions_bomb -= {constants.Action.Stop} survivable_actions_move -= {constants.Action.Stop} #if survivable_actions_bomb - {constants.Action.Bomb}: # survivable_actions_bomb -= {constants.Action.Bomb} # survivable_actions_move -= {constants.Action.Bomb} survivable_actions = set.intersection(survivable_actions_bomb, survivable_actions_move) #print("survivable", survivable_actions) if len(survivable_actions) == 0: if survivable_actions_bomb: action = self._get_most_survivable_action(n_survivable_bomb) print("Most survivable action when enemy place a bomb", action) return action.value elif survivable_actions_move: action = self._get_most_survivable_action(n_survivable_move) print("Most survivable action when enemy moves", action) return action.value else: # # Survivability with no enemies or teammate # _board = deepcopy(board) agent_positions = np.where( _board > constants.Item.AgentDummy.value) _board[agent_positions] = constants.Item.Passage.value _board[my_position] = board[my_position] _obs = { "position": obs["position"], "blast_strength": obs["blast_strength"], "ammo": obs["ammo"], "bomb_life": obs["bomb_life"], "board": _board } n_survivable = self._get_n_survivable(_board, info["curr_bombs"], info["curr_flames"], _obs, my_position, set.union( kickable, might_kickable), enemy_mobility=0) survivable_actions = list(n_survivable) if survivable_actions: action = self._get_most_survivable_action(n_survivable) print("Most survivable action when no enemy", action) return action.value else: if obs["ammo"] > 0 and obs["blast_strength"] == 0: action = constants.Action.Bomb print("Suicide", action) return action.value else: all_actions = [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Right, constants.Action.Left ] random.shuffle(all_actions) for action in all_actions: next_position = self._get_next_position( my_position, action) if not self._on_board(next_position): continue if not utility.position_is_wall( board, next_position): continue print("Random action", action) return action.value elif len(survivable_actions) == 1: action = survivable_actions.pop() print("Only survivable action", action) return action.value else: n_survivable_min = dict() for a in survivable_actions: n_survivable_min[a] = min( [n_survivable_bomb[a], n_survivable_move[a]]) action = self._get_most_survivable_action(n_survivable_min) print("Most survivable action when no enemy", action) return action.value action = constants.Action.Stop print("No action found", action) return action.value
def _djikstra(board, my_position, bombs, enemies, bomb_timer=None, depth=None, exclude=None): if depth is None: depth = len(board) * 2 if exclude is None: exclude = [ constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames ] def out_of_range(p1, p2): x1, y1 = p1 x2, y2 = p2 return abs(y2 - y1) + abs(x2 - x1) > depth items = defaultdict(list) for bomb in bombs: if bomb['position'] == my_position: items[constants.Item.Bomb].append(my_position) dist = {} prev = {} mx, my = my_position for r in range(max(0, mx - depth), min(len(board), mx + depth)): for c in range(max(0, my - depth), min(len(board), my + depth)): position = (r, c) if any([ out_of_range(my_position, position), utility.position_in_items(board, position, exclude), ]): continue if position == my_position: dist[position] = 0 else: dist[position] = np.inf prev[position] = None item = constants.Item(board[position]) items[item].append(position) # Djikstra H = [] heapq.heappush(H, (0, my_position)) while H: min_dist, position = heapq.heappop(H) if (board[position] != constants.Item.Bomb.value ) and not utility.position_is_passable(board, position, enemies): continue x, y = position for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + x, col + y) if new_position not in dist: continue if not utility.position_is_passable(board, new_position, enemies): continue if bomb_timer is not None: t = bomb_timer[new_position] if t > 0 and abs((min_dist + 1) - t) < 2: continue if min_dist + 1 < dist[new_position]: dist[new_position] = min_dist + 1 prev[new_position] = position heapq.heappush(H, (dist[new_position], new_position)) return items, dist, prev
def act(self, obs, action_space): def convert_bombs(bomb_map): ret = [] locations = np.where(bomb_map > 0) for r, c in zip(locations[0], locations[1]): ret.append({ 'position': (r, c), 'blast_strength': int(bomb_map[(r, c)]) }) return ret depth = 20 my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [constants.Item(e) for e in obs['enemies']] ammo = int(obs['ammo']) blast_strength = int(obs['blast_strength']) if self.prev_pos != None: if self.prev_pos == my_position: if 1 <= self.prev_action.value <= 4: if self.logging: print('freeze') board[self.prev_pos] = constants.Item.Rigid.value items, dist, prev = self._djikstra(board, my_position, bombs, enemies, bomb_timer=self.bomb_time, depth=depth) if self.logging: print('my_position =', my_position) print('board =') print(board) print('dist =') print(dist) print('bombs =', bombs) print('enemies =', enemies) for e in enemies: print(e) pos = items.get(e, []) print('pos =', pos) print('pos_len=', len(pos)) if len(pos) > 0: print('xy=', pos[0][0], ',', pos[0][1]) # print('pos_r =', x, ',',y) print('ammo =', ammo) print('blast_strength =', blast_strength) test_ary = np.ones((11, 11)) for c in range(11): for r in range(11): if (r, c) in dist: test_ary[r, c] = dist[(r, c)] else: test_ary[r, c] = -1 if self.logging: print("dist_mat:") print(test_ary) # update bomb_time map bomb_life = 8 has_bomb = {} already_breakable = np.zeros((11, 11)) for b in bombs: r, c = b['position'] strength = b['blast_strength'] # print('bomb_cr =', c, 'r=', r, 'st=', strength) if self.bomb_time[(r, c)] == 0: self.bomb_time[(r, c)] = bomb_life else: self.bomb_time[(r, c)] -= 1 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, strength): new_pos = (r + d * row, c + d * col) if TestSimpleAgent._out_of_board(new_pos): continue # if new_pos[0] < 0 or new_pos[0] > 10: # continue # if new_pos[1] < 0 or new_pos[1] > 10: # continue if utility.position_is_rigid(board, new_pos): continue if utility.position_is_wood(board, new_pos): already_breakable[new_pos] = 1 if self.bomb_time[new_pos] == 0: self.bomb_time[new_pos] = bomb_life else: self.bomb_time[new_pos] -= 1 has_bomb[new_pos] = 1 # clear up table for c in range(11): for r in range(11): if (r, c) not in has_bomb: self.bomb_time[(r, c)] = 0 if self.logging: print("bomb_time:") print(self.bomb_time) # evaluate each position in terms of breakable woods num_breakable = np.zeros((11, 11)) num_breakable_inside = np.zeros((11, 11)) for c in range(11): for r in range(11): if utility.position_is_wood(board, (r, c)): if already_breakable[(r, c)]: continue for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, blast_strength): new_pos = (r + d * row, c + d * col) if TestSimpleAgent._out_of_board(new_pos): continue if utility.position_is_passable( board, new_pos, enemies) or utility.position_is_flames( board, new_pos): num_breakable[new_pos] += 1 else: break tmp_num = 0 has_passable = False for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if TestSimpleAgent._out_of_board(new_pos): continue if utility.position_is_wood(board, new_pos): tmp_num += 1 elif utility.position_is_passable( board, new_pos, enemies): has_passable = True if (not has_passable) and tmp_num > 0: tmp_num -= 1 num_breakable_inside[(r, c)] = tmp_num if self.logging: print('num_breakable:') print(num_breakable) print('num_breakable_inside:') print(num_breakable_inside) num_breakable_total = np.zeros((11, 11)) for c in range(11): for r in range(11): num_breakable_total[(r, c)] = num_breakable[(r, c)] if num_breakable_total[(r, c)] == -1 or num_breakable_total[( r, c)] == np.inf: continue for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue num_breakable_total[( r, c)] += num_breakable_inside[new_pos] * 0.5 if self.logging: print('num_breakable_total:') print(num_breakable_total) # evaluate each position in total pos_scores = np.zeros((11, 11)) for c in range(11): for r in range(11): if (r, c) not in dist: pos_scores[(r, c)] = -1 continue elif dist[(r, c)] == np.inf: pos_scores[(r, c)] = np.inf continue if num_breakable_total[(r, c)] > 0: pos_scores[(r, c)] += num_breakable_total[(r, c)] pos_scores[(r, c)] += (depth - dist[(r, c)]) * 0.2 # consider power-up items if board[(r, c)] in { constants.Item.ExtraBomb.value, constants.Item.IncrRange.value }: pos_scores[(r, c)] += 50 if self.logging: print('pos_score:') print(pos_scores) # consider degree of freedom dis_to_ene = 100 for e in enemies: pos = items.get(e, []) if len(pos) > 0: d = abs(pos[0][0] - my_position[0]) + abs(pos[0][1] - my_position[1]) if dis_to_ene > d: dis_to_ene = d if dis_to_ene <= -4: # if direction is not None: deg_frees = np.zeros((11, 11)) for c in range(11): for r in range(11): # if pos_scores[(r, c)] == np.inf: # continue if not utility.position_is_passable( board, (r, c), enemies): continue deg_free = 0 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if utility.position_is_passable( board, new_pos, enemies) or utility.position_is_flames( board, new_pos): deg_free += 1 deg_frees[(r, c)] = deg_free if deg_free <= 1: pos_scores[(r, c)] -= 5 if self.logging: print('deg_free') print(deg_frees) # consider bomb blast for i in range(len(bombs)): r, c = bombs[i]['position'] strength = bombs[i]['blast_strength'] pos_scores[(r, c)] = -20 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, strength): new_pos = (r + d * row, c + d * col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if new_pos not in dist: continue elif new_pos == np.inf: continue pos_scores[new_pos] = -20 if self.logging: print('consider blast pos_score:') print(pos_scores) # consider enemies for e in enemies: pos = items.get(e, []) if len(pos) > 0: r = pos[0][0] c = pos[0][1] for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, blast_strength * 2): new_pos = (r + d * row, c + d * col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if not utility.position_is_passable( board, new_pos, enemies): break pos_scores[new_pos] += 0.3 if self.logging: print('consider enemy:') print(pos_scores) h_r, h_c = -1, -1 h_score = -1 for c in range(11): for r in range(11): if (r, c) not in dist: continue elif dist[(r, c)] == np.inf: continue if h_score < pos_scores[(r, c)]: h_score = pos_scores[(r, c)] h_r, h_c = (r, c) if self.logging: print('h_score and pos:', h_score, '(r, c) =', h_r, ',', h_c) print('prev:') print(prev) # if current position is not the highest score position, move to the highest position. if h_r == -1: # print('action: Stop') self.prev_action = constants.Action.Stop # return constants.Action.Stop.value elif pos_scores[my_position] == h_score: if self._can_escape(pos_scores, my_position, blast_strength): # print('set bomb') self.prev_action = constants.Action.Bomb # return constants.Action.Bomb.value else: # print('action: Stop2') self.prev_action = constants.Action.Stop # return constants.Action.Stop.value else: # print('action: backtrack') self.prev_action = self._backtrack(my_position, (h_r, h_c), prev) # return self._backtrack(my_position, (h_r, h_c), prev) self.prev_pos = my_position if self.logging: print('action: ', self.prev_action) return self.prev_action.value # Move if we are in an unsafe place. unsafe_directions = self._directions_in_range_of_bomb( board, my_position, bombs, dist) if unsafe_directions: directions = self._find_safe_directions(board, my_position, unsafe_directions, bombs, enemies) return random.choice(directions).value # Lay pomme if we are adjacent to an enemy. if self._is_adjacent_enemy(items, dist, enemies) and self._maybe_bomb( ammo, blast_strength, items, dist, my_position): return constants.Action.Bomb.value # Move towards an enemy if there is one in exactly three reachable spaces. direction = self._near_enemy(my_position, items, dist, prev, enemies, 3) if direction is not None and (self._prev_direction != direction or random.random() < .5): self._prev_direction = direction return direction.value # Move towards a good item if there is one within two reachable spaces. direction = self._near_good_powerup(my_position, items, dist, prev, 2) if direction is not None: return direction.value # Maybe lay a bomb if we are within a space of a wooden wall. if self._near_wood(my_position, items, dist, prev, 1): if self._maybe_bomb(ammo, blast_strength, items, dist, my_position): return constants.Action.Bomb.value else: return constants.Action.Stop.value # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb. direction = self._near_wood(my_position, items, dist, prev, 2) if direction is not None: directions = self._filter_unsafe_directions( board, my_position, [direction], bombs) if directions: return directions[0].value # Choose a random but valid direction. directions = [ constants.Action.Stop, constants.Action.Left, constants.Action.Right, constants.Action.Up, constants.Action.Down ] valid_directions = self._filter_invalid_directions( board, my_position, directions, enemies) directions = self._filter_unsafe_directions(board, my_position, valid_directions, bombs) directions = self._filter_recently_visited( directions, my_position, self._recently_visited_positions) if len(directions) > 1: directions = [k for k in directions if k != constants.Action.Stop] if not len(directions): directions = [constants.Action.Stop] # Add this position to the recently visited uninteresting positions so we don't return immediately. self._recently_visited_positions.append(my_position) self._recently_visited_positions = self._recently_visited_positions[ -self._recently_visited_length:] return random.choice(directions).value
def act(self, obs, action_space): # # Definitions # self._search_range = 10 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] # # Prepare extended observations # - bomb moving direction # - flame remaining life # # Summarize information about bombs # curr_bombs : list of current bombs # moving_direction : array of moving direction of bombs curr_bombs, moving_direction, self._prev_bomb_life \ = self._get_bombs(obs, self._prev_bomb_life) # Summarize information about flames curr_flames, self._prev_flame_life \ = self._get_flames(board, self._prev_flame_life, self._prev_bomb_position_strength) # bombs to be exploded in the next step self._prev_bomb_position_strength = list() rows, cols = np.where(obs["bomb_blast_strength"] > 0) for position in zip(rows, cols): strength = int(obs["bomb_blast_strength"][position]) self._prev_bomb_position_strength.append((position, strength)) # # Understand current situation # # Simulation assuming enemies stay unmoved # List of simulated boards list_boards_no_move, _ \ = self._board_sequence(board, curr_bombs, curr_flames, self._search_range, my_position, enemy_mobility=0) # List of the set of survivable time-positions at each time # and preceding positions survivable_no_move, prev_no_move \ = self._search_time_expanded_network(list_boards_no_move, my_position) # Items that can be reached in a survivable manner reachable_items_no_move, reached_no_move, next_to_items_no_move \ = self._find_reachable_items(list_boards_no_move, my_position, survivable_no_move) # Simulation assuming enemies move for enemy_mobility in range(3, -1, -1): # List of boards simulated list_boards, _ = self._board_sequence(board, curr_bombs, curr_flames, self._search_range, my_position, enemy_mobility=enemy_mobility) # List of the set of survivable time-positions at each time # and preceding positions survivable, prev = self._search_time_expanded_network(list_boards, my_position) if len(survivable[1]) > 0: # Gradually reduce the mobility of enemy, so we have at least one survivable action break # Items that can be reached in a survivable manner reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, curr_bombs, curr_flames) survivable_actions = [a for a in is_survivable if is_survivable[a]] if verbose: print("survivable actions are", survivable_actions) # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, moving_direction) print() for t in range(0): print(list_boards[t]) print(survivable[t]) for key in prev[t]: print(key, prev[t][key]) # # Choose an action # """ # This is not effective in the current form if len(survivable_actions) > 1: # avoid the position if only one position at the following step # the number of positions that can be reached from the next position next = defaultdict(set) next_count = defaultdict(int) for position in survivable[1]: next[position] = set([p for p in prev[2] if position in prev[2][p]]) next_count[position] = len(next[position]) print("next count", next_count) if max(next_count.values()) > 1: for position in survivable[1]: if next_count[position] == 1: risky_action = self._get_direction(my_position, position) is_survivable[risky_action] = False survivable_actions = [a for a in is_survivable if is_survivable[a]] """ # Do not stay on a bomb if I can if all([obs["bomb_life"][my_position] > 0, len(survivable_actions) > 1, is_survivable[constants.Action.Stop]]): is_survivable[constants.Action.Stop] = False survivable_actions = [a for a in is_survivable if is_survivable[a]] if len(survivable_actions) == 0: # must die # TODO: might want to do something that can help team mate # TODO: kick if possible print("Must die", constants.Action.Stop) return super().act(obs, action_space) # return constants.Action.Stop.value elif len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value # Move towards good items good_items = [constants.Item.ExtraBomb, constants.Item.IncrRange] # TODO : kick may be a good item only if I cannot kick yet # TODO : might want to destroy good_items.append(constants.Item.Kick) # positions with good items good_time_positions = set() for item in good_items: good_time_positions = good_time_positions.union(reachable_items[item]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward good item", action) return action.value # TODO : shoud check the survivability of all agents in one method # Place a bomb if # - it does not significantly reduce my survivability # - it can break wood # - it can reduce the survivability of enemies if is_survivable[constants.Action.Bomb]: # if survavable now after bomb, consider bomb if all([len(s) > 0 for s in survivable_with_bomb]): # if survivable all the time after bomb, consider bomb if all([self._can_break_wood(list_boards_no_move[-1], my_position, my_blast_strength)] + [not utility.position_is_flames(board, my_position) for board in list_boards_no_move[:10]]): # place bomb if can break wood print("Bomb to break wood", constants.Action.Bomb) return constants.Action.Bomb.value for enemy in my_enemies: # check if the enemy is reachable if len(reachable_items_no_move[enemy]) == 0: continue # can reach the enemy at enemy_position in enemy_time step enemy_time = reachable_items_no_move[enemy][0][0] enemy_position = reachable_items_no_move[enemy][0][1:3] # find direction towards enemy positions = set([x[1:3] for x in next_to_items_no_move[enemy]]) for t in range(enemy_time, 1, -1): _positions = set() for position in positions: _positions = _positions.union(prev_no_move[t][position]) positions = _positions.copy() #if enemy_time <= my_blast_strength: if True: positions.add(my_position) positions_after_bomb = set(survivable[1]).difference(positions) if positions_after_bomb: print("Bomb to kill an enemy", enemy, constants.Action.Bomb) return constants.Action.Bomb.value else: # bomb to kick x0, y0 = my_position positions_against = [(2*x0-x, 2*y0-y) for (x, y) in positions] positions_after_bomb = set(survivable[1]).intersection(positions_against) if positions_after_bomb: print("Bomb to kick", enemy, constants.Action.Bomb) return constants.Action.Bomb.value """ # check if placing a bomb can reduce the survivability # of the enemy survivable_before, _ = self._search_time_expanded_network(list_boards_no_move, enemy_position) board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(curr_bombs) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb(characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, curr_flames, self._search_range, my_position, enemy_mobility=0) survivable_after, _ \ = self._search_time_expanded_network(list_boards_with_bomb, enemy_position) good_before = np.array([len(s) for s in survivable_before]) good_after = np.array([len(s) for s in survivable_after]) # TODO : what are good criteria? if any(good_after < good_before): # place a bomb if it makes sense print("Bomb to kill an enemy", constants.Action.Bomb) return constants.Action.Bomb.value """ # Move towards a wood if len(next_to_items_no_move[constants.Item.Wood]) > 0: # positions next to wood good_time_positions = next_to_items_no_move[constants.Item.Wood] action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward wood", action) return action.value # kick whatever I can kick # -- tentative, this is generally not a good strategy if len(kickable) > 0: while kickable: # then consider what happens if I kick a bomb next_position = kickable.pop() # do not kick a bomb if it will break a wall if all([moving_direction[next_position] is None, self._can_break_wood(board, next_position, my_blast_strength)]): # if it is a static bomb # do not kick if it is breaking a wall continue my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], curr_bombs, curr_flames, self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=3) survivable_with_kick, prev_kick \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: print("Kicking", my_action) return my_action.value # Move towards an enemy good_time_positions = set() for enemy in my_enemies: good_time_positions = good_time_positions.union(next_to_items[enemy]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if obs["bomb_life"][my_position] > 0: # if on a bomb, move away if action == constants.Action.Down and is_survivable[constants.Action.Up]: action = constants.Action.Up elif action == constants.Action.Up and is_survivable[constants.Action.Down]: action = constants.Action.Down elif action == constants.Action.Right and is_survivable[constants.Action.Left]: action = constants.Action.Left elif action == constants.Action.Left and is_survivable[constants.Action.Right]: action = constants.Action.Right else: action = None if action is not None: print("Moving toward/against enemy", action) return action.value # # as in the agent from the previous competition # action = super().act(obs, action_space) if is_survivable[constants.Action(action)]: print("Action from prev. agent", constants.Action(action)) return action else: action = random.choice(survivable_actions) print("Random action", action) return action.value
def _djikstra(board, my_position, bombs, enemies, depth=None, exclude=None): """ Dijkstra method Parameters ---------- board = np.array(obs['board']) my_position = tuple(obs['position']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [constants.Item(e) for e in obs['enemies']] """ if depth is None: depth = len(board) * 2 if exclude is None: exclude = [ constants.Item.Fog, constants.Item.Rigid, constants.Item.Flames ] def out_of_range(p1, p2): x1, y1 = p1 x2, y2 = p2 return abs(y2 - y1) + abs(x2 - x1) > depth items = defaultdict(list) for bomb in bombs: if bomb['position'] == my_position: items[constants.Item.Bomb].append(my_position) dist = {} prev = {} mx, my = my_position for r in range(max(0, mx - depth), min(len(board), mx + depth)): for c in range(max(0, my - depth), min(len(board), my + depth)): position = (r, c) if any([ out_of_range(my_position, position), utility.position_in_items(board, position, exclude), ]): continue if position == my_position: dist[position] = 0 else: dist[position] = np.inf prev[position] = None item = constants.Item(board[position]) items[item].append(position) # Djikstra H = [] heapq.heappush(H, (0, my_position)) while H: min_dist, position = heapq.heappop(H) if not utility.position_is_passable(board, position, enemies): continue x, y = position for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + x, col + y) if new_position not in dist: continue if min_dist + 1 < dist[new_position]: dist[new_position] = min_dist + 1 prev[new_position] = position heapq.heappush(H, (dist[new_position], new_position)) return items, dist, prev
def act(self, obs, action_space): def convert_bombs(bomb_map): ret = [] locations = np.where(bomb_map > 0) for r, c in zip(locations[0], locations[1]): ret.append({ 'position': (r, c), 'blast_strength': int(bomb_map[(r, c)]) }) return ret my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [constants.Item(e) for e in obs['enemies']] ammo = int(obs['ammo']) blast_strength = int(obs['blast_strength']) items, dist, prev = self._djikstra(board, my_position, bombs, enemies, depth=20) # safety score safety_score = self._make_safety_score(board, items, bombs, enemies) if safety_score[my_position] == -np.inf: max_distance = 5 safe_positions = list() maybe_positions = list() mx, my = my_position for x in range(max([0, mx - max_distance]), min([len(board), mx + max_distance])): for y in range(max([0, my - max_distance]), min([len(board), my + max_distance])): if not (x, y) in dist: # unreachable continue if safety_score[(x, y)] > 1: safe_positions.append((x, y)) elif safety_score[(x, y)] == 1: maybe_positions.append((x, y)) nearest = None dist_to = max(dist.values()) for position in safe_positions: d = dist[position] if d <= dist_to: nearest = position dist_to = d if dist_to > max_distance: for position in maybe_positions: d = dist[position] if d <= dist_to: nearest = position dist_to = d if nearest is not None: # found a way to escape if prev[nearest] is not None: direction = self._get_direction_towards_position( my_position, nearest, prev) if verbose: print("escaping") return direction.value # Move towards a powerup direction = self._near_good_powerup(my_position, items, dist, prev, 20) if direction is not None: if safety_score[utility.get_next_position(my_position, direction)] > 1: if verbose: print("item") return direction.value else: if verbose: print("item but unsafe") # Break whatever you can break to_break = self._what_to_break(board, my_position, blast_strength) maybe = self._maybe_bomb(ammo, blast_strength, items, dist, my_position) if len(to_break) > 0 and maybe: if verbose: print("to break", to_break) return constants.Action.Bomb.value # Move towards an enemy if constants.Item.Wood not in items and constants.Item.ExtraBomb not in items and constants.Item.Kick not in items: to_chase = 3 else: to_chase = 3 direction = self._near_enemy(my_position, items, dist, prev, enemies, to_chase) if direction is not None and (self._prev_direction != direction or random.random() < .5): self._prev_direction = direction if verbose: print("enemy") return direction.value # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb. direction = self._near_wood(my_position, items, dist, prev, 2) if direction is not None: if safety_score[utility.get_next_position(my_position, direction)] > 1: if verbose: print("wood") return direction.value # Choose a random but valid direction. directions = [ constants.Action.Stop, constants.Action.Left, constants.Action.Right, constants.Action.Up, constants.Action.Down ] valid_directions = self._filter_invalid_directions( board, my_position, directions, enemies) directions = self._filter_unsafe_directions(board, my_position, valid_directions, bombs) directions = self._filter_recently_visited( directions, my_position, self._recently_visited_positions) if len(directions) > 1: directions = [k for k in directions if k != constants.Action.Stop] if not len(directions): directions = [constants.Action.Stop] # Add this position to the recently visited uninteresting positions so we don't return immediately. self._recently_visited_positions.append(my_position) self._recently_visited_positions = self._recently_visited_positions[ -self._recently_visited_length:] p = [ safety_score[utility.get_next_position(my_position, d)] for d in directions ] if verbose: print("random", p, directions) p = np.exp(p) if len(p) == 1: p = [1] else: p /= np.sum(p) try: #return np.random.choice(directions, p=p).value return random.choices(directions, weights=p).value except: return random.choice(directions).value
def act(self, obs, action_space, info): # # Definitions # enemy_mobility = 4 enemy_bomb = 1 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_kick = obs["can_kick"] # whether I can kick print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t") # # Understand current situation # # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) #np.set_printoptions(precision=2) #print("frac") #print(total_frac_blocked) # where to place bombs to break wood bomb_target_wood, n_breakable \ = self._get_bomb_target(info["list_boards_no_move"][-1], my_position, my_blast_strength, constants.Item.Wood, max_breakable=False) #bomb_target_enemy = (total_frac_blocked > 0) #bomb_target = bomb_target_enemy + bomb_target_wood bomb_target = bomb_target_wood # List of boards simulated list_boards, _ = self._board_sequence( board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where( info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value #print("boards") #for t, b in enumerate(list_boards): # print(t) # print(b[-3:,:]) # if t > 2: # break # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(list_boards, my_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable, _ = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() survivable_actions = [a for a in is_survivable if is_survivable[a]] #print("survivable actions", survivable_actions) if len(survivable_actions) == 0: return None # # Items and bomb target that can be reached in a survivable manner # reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable, bomb_target) # # Evaluate the survivability of each action # x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( succ, 1, next_position) if verbose: print("n_survivable") for a in n_survivable: print(a, n_survivable[a]) # # Avoid the action leading to no choice if possible # updated = False max_survivable_positions = max([n[-1] for n in n_survivable.values()]) if max_survivable_positions > 1: for a in n_survivable: if n_survivable[a][-1] > max_survivable_positions / 2: continue is_survivable[a] = False updated = True minn = defaultdict(int) for a in n_survivable: minn[a] = min(n_survivable[a][enemy_mobility:]) maxmin = max(minn.values()) if maxmin > 1: for a in minn: if minn[a] == 1: is_survivable[a] = False updated = True if updated: survivable_actions = [a for a in is_survivable if is_survivable[a]] # # Choose the survivable action, if it is the only choice # if len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value """ # # Bomb if it has dominating survivability # if is_survivable[constants.Action.Bomb]: bomb_is_most_survivable = True bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb])) for action in n_survivable: if action == constants.Action.Bomb: continue action_sorted = np.array(sorted(n_survivable[action])) if any(action_sorted > bomb_sorted): bomb_is_most_survivable = False break if bomb_is_most_survivable: action = constants.Action.Bomb print("Bomb to survive", action) return action.value """ # # Bomb at a target # best_action = None max_block = 0 for action in survivable_actions: next_position = self._get_next_position(my_position, action) block = total_frac_blocked[next_position] if block > max_block: max_block = block best_action = action if all([ is_survivable[constants.Action.Bomb], best_action in [constants.Action.Stop, constants.Action.Bomb] ]): print("Place a bomb at a locally optimal position", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = self._get_next_position(my_position, best_action) # TODO : PARAMETER TO OPTIMIZE if total_frac_blocked[next_position] > 0.1: print("Move towards better place to bomb", best_action) return best_action.value # # Bomb to break wood # consider_bomb = True if survivable_with_bomb is None: consider_bomb = False elif any([len(s) <= 1 for s in survivable_with_bomb[2:]]): # if not sufficiently survivable all the time after bomb, do not bomb consider_bomb = False elif self._might_break_powerup(info["list_boards_no_move"][-1], my_position, my_blast_strength, info["might_powerup"]): # if might break an item, do not bomb consider_bomb = False if consider_bomb and bomb_target[my_position]: # place bomb if I am at a bomb target print("Bomb at a bomb target", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards good items # good_items = [ constants.Item.ExtraBomb, constants.Item.IncrRange, constants.Item.Kick ] good_time_positions = set() # positions with good items for item in good_items: good_time_positions = good_time_positions.union( reachable_items[item]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward good item", action) return action.value # # Move towards where to bomb to break wood # good_time_positions = reachable_items["target"] print("good time positions", good_time_positions) action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward where to bomb", action) return action.value # # Kick # for my_action in kick_actions: if my_action == constants.Action.Up: next_position = (my_position[0] - 1, my_position[1]) elif my_action == constants.Action.Down: next_position = (my_position[0] + 1, my_position[1]) elif my_action == constants.Action.Right: next_position = (my_position[0], my_position[1] + 1) elif my_action == constants.Action.Left: next_position = (my_position[0], my_position[1] - 1) # do not kick a bomb if it will break a wall, enemies if info["moving_direction"][next_position] is None: print("checking static bomb") # if it is a static bomb if self._can_break(info["list_boards_no_move"][0], next_position, my_blast_strength, [constants.Item.Wood] + my_enemies): continue list_boards_with_kick_no_move, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) for enemy in my_enemies: rows, cols = np.where(board == enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable, _, _, _ \ = self._search_time_expanded_network(list_boards_with_kick_no_move, enemy_position) n_survivable_nodes_with_kick = sum( [len(positions) for positions in _survivable]) if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]: print("Kicking to reduce the survivability", n_survivable_nodes[enemy], "->", n_survivable_nodes_with_kick, my_action) return my_action.value # # TODO : move toward might powerups # # # Move towards a fog where we have not seen longest # best_time_position = None oldest = 0 for t, x, y in next_to_items[constants.Item.Fog]: neighbors = [(x + dx, y + dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]] age = max([ info["since_last_seen"][position] for position in neighbors if self._on_board(position) ]) if age > oldest: oldest = age best_time_position = (t, x, y) if best_time_position is not None: action = self._find_distance_minimizer(my_position, [best_time_position], prev, is_survivable) if action is not None: print("Moving toward oldest fog", action) return action.value # # Choose most survivable action # action = self._get_most_survivable_action(n_survivable) print("Most survivable action", action) return action.value
def act(self, obs, action_space, info): # # Definitions # board = info['recently_seen'] #board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_kick = obs["can_kick"] # whether I can kick my_enemies = [ constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy ] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None all_feasible_actions = [ a for a in info["my_next_position"] if info["my_next_position"][a] ] # positions that might be blocked if info["teammate_position"] is None: agent_positions = info["enemy_positions"] else: agent_positions = info["enemy_positions"] + [ info["teammate_position"] ] # # Fraction of blocked node in the survival trees of enemies # _list_boards = info["list_boards_no_move"] if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"], ignore_dying_agent=False) if info["teammate_position"] is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"], ignore_dying_agent=True) block = defaultdict(float) for action in [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: next_position = info["my_next_position"][action] if next_position is None: continue if next_position in info["all_kickable"]: # kick will be considered later continue block[action] = total_frac_blocked[next_position] if info["teammate_position"] is not None and block[action] > 0: block[action] *= (1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if all([my_ammo > 0, obs["bomb_blast_strength"][my_position] == 0]): list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) block[constants.Action.Bomb] \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes, board, my_enemies, ignore_dying_agent=False) block[constants.Action.Bomb] \ += total_frac_blocked[my_position] * (1 - block[constants.Action.Bomb]) if info["teammate_position"] is not None: block_teammate_with_bomb = self._get_frac_blocked_two_lists( list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) # this is an approximation block_teammate_with_bomb \ += total_frac_blocked_teammate[my_position] * (1 - block_teammate_with_bomb) block[constants.Action.Bomb] *= (1 - block_teammate_with_bomb) if block[constants.Action.Bomb] > 0: block[constants.Action.Bomb] *= self._inv_tmp block[constants.Action.Bomb] -= np.log( -np.log(self.random.uniform())) block_teammate_with_kick = defaultdict(float) for next_position in info["all_kickable"]: my_action = self._get_direction(my_position, next_position) backedup = False if board[next_position] != constants.Item.Bomb.value: backup_cell = board[next_position] board[ next_position] = constants.Item.Bomb.value # an agent will be overwritten backedup = True list_boards_with_kick, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) if backedup: board[next_position] = backup_cell block[my_action] \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes, board, my_enemies) block[my_action] \ += total_frac_blocked[next_position] * (1 - block[my_action]) if block[my_action] > 0 and info["teammate_position"] is not None: block_teammate_with_kick[next_position] \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) # this is an approximation block_teammate_with_kick[next_position] \ += total_frac_blocked_teammate[next_position] * (1 - block_teammate_with_kick[next_position]) block[my_action] *= (1 - block_teammate_with_kick[next_position]) if block[my_action] > 0: block[my_action] *= self._inv_tmp block[my_action] -= np.log(-np.log(self.random.uniform())) n_survivable_move, is_survivable_move, list_boards_move \ = self._get_survivable(obs, info, my_position, info["my_next_position"], info["enemy_positions"], info["all_kickable"], allow_kick_to_fog=True, enemy_mobility=1, enemy_bomb=0, ignore_dying_agent=False, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) for a in all_feasible_actions: if a not in n_survivable_move: n_survivable_move[a] = np.zeros(self._search_range) enemy_can_place_bomb = any([ obs["bomb_blast_strength"][position] == 0 for position in info["enemy_positions"] ]) if enemy_can_place_bomb: n_survivable_bomb, is_survivable_bomb, list_boards_bomb \ = self._get_survivable(obs, info, my_position, info["my_next_position"], info["enemy_positions"], info["all_kickable"], allow_kick_to_fog=True, enemy_mobility=0, enemy_bomb=1, ignore_dying_agent=False, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) for a in all_feasible_actions: if a not in n_survivable_bomb: n_survivable_bomb[a] = np.zeros(self._search_range) might_survivable_actions = set( [a for a in n_survivable_bomb if n_survivable_bomb[a][-1] > 0] + [a for a in n_survivable_move if n_survivable_move[a][-1] > 0]) might_survivable_actions -= info["might_block_actions"] for a in info["might_block_actions"]: n_survivable_bomb[a] = np.zeros(self._search_range) n_survivable_move[a] = np.zeros(self._search_range) for a in might_survivable_actions: if a not in n_survivable_bomb: n_survivable_bomb[a] = np.zeros(self._search_range) if a not in n_survivable_move: n_survivable_move[a] = np.zeros(self._search_range) survivable_actions = list() for action in might_survivable_actions: if n_survivable_move[action][-1] > 0 and n_survivable_bomb[ action][-1] > 0: if not info["might_blocked"][action] or n_survivable_move[ constants.Action.Stop][-1] > 0: survivable_actions.append(action) n_survivable_expected = dict() for a in survivable_actions: if info["might_blocked"][a]: n_survivable_expected[a] \ = np.array(n_survivable_bomb[a]) \ + np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 else: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + 2 * np.array( n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 n_survivable_expected[a] = n_survivable_expected[a] else: might_survivable_actions = set( [a for a in n_survivable_move if n_survivable_move[a][-1] > 0]) might_survivable_actions -= info["might_block_actions"] for a in info["might_block_actions"]: n_survivable_move[a] = np.zeros(self._search_range) survivable_actions = list() for action in might_survivable_actions: if n_survivable_move[action][-1] > 0: if not info["might_blocked"][action] or n_survivable_move[ constants.Action.Stop][-1] > 0: survivable_actions.append(action) for a in might_survivable_actions: if a not in n_survivable_move: n_survivable_move[a] = np.zeros(self._search_range) n_survivable_expected = dict() for a in survivable_actions: if info["might_blocked"][a]: n_survivable_expected[a] \ = np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 else: n_survivable_expected[a] = np.array(n_survivable_move[a]) # # Choose actions # if len(survivable_actions) == 1: action = survivable_actions.pop() return action.value if len(survivable_actions) > 1: most_survivable_actions = self._get_most_survivable_actions( n_survivable_expected) if len(most_survivable_actions) == 1: return most_survivable_actions[0].value elif len(most_survivable_actions) > 1: # tie break by block score max_block = 0 # do not choose 0 best_action = None for action in all_feasible_actions: if action not in most_survivable_actions: # for deterministic behavior continue if info["might_block_teammate"][action]: continue if block[action] > max_block: max_block = block[action] best_action = action if best_action is not None: return best_action.value # # no survivable actions for all cases # if enemy_can_place_bomb: n_survivable_expected = dict() for a in all_feasible_actions: if info["might_blocked"][a]: if is_survivable_move[constants.Action.Stop]: n_survivable_expected[a] \ = np.array(n_survivable_bomb[a]) \ + np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_move[a]) else: n_survivable_expected[a] \ = np.array(n_survivable_bomb[a]) \ + np.array(n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 else: n_survivable_expected[a] = np.array( n_survivable_bomb[a]) + 2 * np.array( n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 3 else: n_survivable_expected = dict() for a in all_feasible_actions: if info["my_next_position"][a] is None: continue if info["might_blocked"][a]: if is_survivable_move[constants.Action.Stop]: n_survivable_expected[a] \ = np.array(n_survivable_move[constants.Action.Stop]) \ + np.array(n_survivable_move[a]) else: n_survivable_expected[a] = np.array( n_survivable_move[a]) n_survivable_expected[a] = n_survivable_expected[a] / 2 else: n_survivable_expected[a] = np.array(n_survivable_move[a]) if len(might_survivable_actions) == 1: action = might_survivable_actions.pop() return action.value if len(might_survivable_actions) > 1: most_survivable_actions = self._get_most_survivable_actions( n_survivable_expected) if len(most_survivable_actions) == 1: return most_survivable_actions[0].value elif len(most_survivable_actions) > 1: # tie break by block score max_block = 0 # do not choose 0 best_action = None for action in all_feasible_actions: if action not in most_survivable_actions: # for deterministic behavior continue if info["might_block_teammate"][action]: continue if block[action] > max_block: max_block = block[action] best_action = action if best_action is not None: return best_action.value # no survivable action found for any cases # TODO : Then consider killing enemies or helping teammate max_block = 0 # do not choose 0 best_action = None for action in all_feasible_actions: if action not in block: # for deterministic behavior continue if info["might_block_teammate"][action]: continue if all([ action == constants.Action.Bomb, info["teammate_position"] is not None ]): if block_teammate_with_bomb > 0: continue next_position = info["my_next_position"][action] if all([ next_position in info["all_kickable"], block_teammate_with_kick[next_position] > 0 ]): continue if block[action] > max_block: max_block = block[action] best_action = action if best_action is not None: return best_action.value # longest survivable action longest_survivable_actions = self._get_longest_survivable_actions( n_survivable_expected) if len(longest_survivable_actions) == 1: return longest_survivable_actions[0].value elif len(longest_survivable_actions) > 1: # break tie by most survivable actions for a in n_survivable_expected: if a not in longest_survivable_actions: n_survivable_expected[a] = np.zeros(self._search_range) most_survivable_actions = self._get_most_survivable_actions( n_survivable_expected) if len(most_survivable_actions) == 1: return most_survivable_actions[0].value elif len(most_survivable_actions) > 1: if info["teammate_position"] is not None: min_block = np.inf best_action = None for a in all_feasible_actions: if a not in most_survivable_actions: # for deterministic behavior continue if a == constants.Action.Bomb: score = block_teammate_with_bomb # do not choose Bomb unless it is strictly better than others else: next_position = info["my_next_position"][a] if next_position in info["all_kickable"]: score = block_teammate_with_kick[ next_position] - self.random.uniform( 0, 1e-6) else: score = total_frac_blocked_teammate[ next_position] - self.random.uniform( 0, 1e-6) if score < min_block: min_block = score best_action = a if best_action is not None: return best_action.value else: # remove Bomb (as it is most affected by bugs) #most_survivable_actions = list(set(most_survivable_actions) - {constants.Action.Bomb}) most_survivable_actions = [ a for a in all_feasible_actions if a in most_survivable_actions and a != constants.Action.Bomb ] index = self.random.randint(len(most_survivable_actions)) random_action = most_survivable_actions[index] return random_action.value # The following will not be used self.random.shuffle(all_feasible_actions) if len(all_feasible_actions): action = all_feasible_actions[0] return action.value action = constants.Action.Stop return action.value
def act(self, obs, action_space): if self.prev_ammo is None: self.prev_ammo = self.ammo self.prev_blast_strength = self.blast_strength self.prev_can_kick = self.can_kick self.prev_wood_wall = np.sum(obs['board'] == 2) self.steps += 1 state = self.process_observation(obs['board'], obs['position'], obs['bomb_life'], obs['bomb_blast_strength'], obs['enemies']) self.memory.store(self.prev_state, state, 0, self.prev_action) self.prev_state = state self.prev_ammo = self.ammo self.prev_blast_strength = self.blast_strength self.prev_can_kick = self.can_kick self.prev_wood_wall = np.sum(obs['board'] == 2) def convert_bombs(bomb_map): '''Flatten outs the bomb array''' ret = [] locations = np.where(bomb_map > 0) for r, c in zip(locations[0], locations[1]): ret.append({ 'position': (r, c), 'blast_strength': int(bomb_map[(r, c)]) }) return ret my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [constants.Item(e) for e in obs['enemies']] ammo = int(obs['ammo']) blast_strength = int(obs['blast_strength']) items, dist, prev = self._djikstra(board, my_position, bombs, enemies, depth=10) # Move if we are in an unsafe place. unsafe_directions = self._directions_in_range_of_bomb( board, my_position, bombs, dist) if unsafe_directions: directions = self._find_safe_directions(board, my_position, unsafe_directions, bombs, enemies) action = random.choice(directions).value self.prev_action = action return action # Lay pomme if we are adjacent to an enemy. if self._is_adjacent_enemy(items, dist, enemies) and self._maybe_bomb( ammo, blast_strength, items, dist, my_position): action = constants.Action.Bomb.value self.prev_action = action return action # Move towards an enemy if there is one in exactly three reachable spaces. direction = self._near_enemy(my_position, items, dist, prev, enemies, 3) if direction is not None and (self._prev_direction != direction or random.random() < .5): self._prev_direction = direction action = direction.value self.prev_action = action return action # Move towards a good item if there is one within two reachable spaces. direction = self._near_good_powerup(my_position, items, dist, prev, 2) if direction is not None: action = direction.value self.prev_action = action return action # Maybe lay a bomb if we are within a space of a wooden wall. if self._near_wood(my_position, items, dist, prev, 1): if self._maybe_bomb(ammo, blast_strength, items, dist, my_position): action = constants.Action.Bomb.value self.prev_action = action return action else: action = constants.Action.Stop.value self.prev_action = action return action # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb. direction = self._near_wood(my_position, items, dist, prev, 2) if direction is not None: directions = self._filter_unsafe_directions( board, my_position, [direction], bombs) if directions: action = directions[0].value self.prev_action = action return action # Choose a random but valid direction. directions = [ constants.Action.Stop, constants.Action.Left, constants.Action.Right, constants.Action.Up, constants.Action.Down ] valid_directions = self._filter_invalid_directions( board, my_position, directions, enemies) directions = self._filter_unsafe_directions(board, my_position, valid_directions, bombs) directions = self._filter_recently_visited( directions, my_position, self._recently_visited_positions) if len(directions) > 1: directions = [k for k in directions if k != constants.Action.Stop] if not len(directions): directions = [constants.Action.Stop] # Add this position to the recently visited uninteresting positions so we don't return immediately. self._recently_visited_positions.append(my_position) self._recently_visited_positions = self._recently_visited_positions[ -self._recently_visited_length:] action = random.choice(directions).value self.prev_action = action return action
def act(self, obs, action_space): self.action_space = action_space my_pos = tuple(obs['position']) board = np.array(obs['board']) self.board = np.array(obs['board']) self._enemies = [constants.Item(e) for e in obs['enemies']] if (self.copy_walls): for i in range(len(self.copy_board)): for j in range(len(self.copy_board[i])): if (board[i][j] == 1): self.copy_board[i][j] = 9999 self.copy_board[my_pos[0]][my_pos[1]] += 1 #check new bombs on field first bomb_life_map = np.array(obs['bomb_life']) self.find_bombing_agents(bomb_life_map, board) #print("bomb_life_map \n", bomb_life_map) #preform MCTS ONLY IF ENEMY AGENT IS VISISIBLE if self.enemy_in_my_sights_and_ammo(obs, 5): #print("my ammo",int(obs['ammo'])) #print("HELLO MCTS") #check board, to see if someone inside my view made a bomb move tree = gn.Tree(obs, True, self.bombing_agents) #get the root node self.rootNode = tree.get_root_node() #need way to find terminating condition self.end_time = 30 start_time = time.time() elapsed = 0 #while(elapsed < self.end_time): while (self.rootNode.visit_count < 25): #print("board \n",self.rootNode.state._board) #for i in self.rootNode.childArray: #print("ROOT NODES CHILDREN") #print("i move is",i.state.move) #print("i temp score is", i.state.score) #print("i temp node score is", i.score) promising_node = self.select_promising_node(self.rootNode) #print("promisng nodes move", promising_node.my_move) #print("promising nodes score", promising_node.score) #expand that node #create the childs for that node self.expand_node(promising_node) # print("EXPANDED PROMISE NODE") # for i in promising_node.childArray: # print("PROMISING NODES CHILDREN") # print("i move is",i.state.move) # print("i temp score is", i.state.score) # print("i temp node score is", i.score) # #explore that node # print("LENGTH OF CHILDREN", len(promising_node.childArray)) nodeToExplore = promising_node.get_random_child_node() #print("Node to explore", nodeToExplore.state.move) #simulate simulationResult = self.simulate_random_play(nodeToExplore) # simulationResult = self.simulate_random_play_yichen(nodeToExplore, self.copy_board) #propogate up self.back_propogation(nodeToExplore, simulationResult) nowTime = time.time() elapsed += (nowTime - start_time) start_time = nowTime # input() #winner is root node with child with big score #winner_node = rootNode.get_child_with_max_score() winner_node = None max_ucb = float('-inf') for child in self.rootNode.childArray: #print("child move is", child.state.move) #print ("the node", child.score) #print ("the node's state", child.state.score) UCB1 = self.UCB(child, child.get_win_score(), child.get_visit_count(), self.rootNode.get_visit_count()) if UCB1 > max_ucb: max_ucb = UCB1 winner_node = child #print("winning childs move is ", winner_node.state.move) #print("winning childs score is", winner_node.score) #print("UCB is", UCB1) self.bombing_agents = winner_node.state.bombing_agents #print("the move I make is",winner_node.state.move) #print("the move I picked score", winner_node.score) return winner_node.state.move #yichen agent time else: #print("YICHEN AGENT TIME") self.agt = agents.YichenAgent() self.agt.make_a_visit_board(self.copy_board) aid = board[my_pos[0]][my_pos[1]] game_mode = constants.GameType.FFA position = my_pos self.agt.init_agent(aid, game_mode) self.agt.set_start_position(position) self.agt.reset(is_alive=True) r = self.agt.act(obs, action_space) #print (r) return r