def _check_if_flame_will_gone(obs, prev_two_obs, flame_pos): assert (prev_two_obs[0] is not None) assert (prev_two_obs[1] is not None) # check the flame group in current obs, see if # the whole group was there prev two obs # otherwise, although this flame appears in prev two obs, # it could be a old overlap new, thus will not gone next step if not (utility.position_is_flames(prev_two_obs[0]['board'], flame_pos) \ and utility.position_is_flames(prev_two_obs[1]['board'], flame_pos)): return False board = obs['board'] Q = deque(maxlen=121) Q.append(flame_pos) visited = [flame_pos] dirs = _all_directions(exclude_stop=True) while len(Q) > 0: pos = Q.popleft() if not (utility.position_is_flames(prev_two_obs[0]['board'], pos) \ and utility.position_is_flames(prev_two_obs[1]['board'], pos)): return False for d in dirs: next_pos = utility.get_next_position(pos, d) if utility.position_on_board(board, next_pos) and utility.position_is_agent(board, next_pos): if next_pos not in visited: Q.append(next_pos) visited.append(next_pos) return True
def position_can_be_bomb_through(board, position): if utility.position_is_flames(board, position): return True if utility.position_is_passage(board, position): return True if utility.position_is_powerup(board, position): return True return False
def _filter_direction_toward_flames(board, my_position, directions, enemies): ret = [] for direction in directions: position = utility.get_next_position(my_position, direction) if utility.position_on_board( board, position) and not utility.position_is_flames(board, position): ret.append(direction) return ret
def no_flames(obs): res = [0] * 6 my_position = obs['position'] board = obs['board'] x, y = my_position for act in dirs: next_pos = util.get_next_position(my_position, act) if util.position_on_board(board, next_pos) and util.position_is_flames( board, next_pos): res[act.value] = -1 return res
def act(self, obs, action_space): # # Definitions # self._search_range = 10 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] # # Prepare extended observations # - bomb moving direction # - flame remaining life # # Summarize information about bombs # curr_bombs : list of current bombs # moving_direction : array of moving direction of bombs curr_bombs, moving_direction, self._prev_bomb_life \ = self._get_bombs(obs, self._prev_bomb_life) # Summarize information about flames curr_flames, self._prev_flame_life \ = self._get_flames(board, self._prev_flame_life, self._prev_bomb_position_strength) # bombs to be exploded in the next step self._prev_bomb_position_strength = list() rows, cols = np.where(obs["bomb_blast_strength"] > 0) for position in zip(rows, cols): strength = int(obs["bomb_blast_strength"][position]) self._prev_bomb_position_strength.append((position, strength)) # # Understand current situation # # Simulation assuming enemies stay unmoved # List of simulated boards list_boards_no_move, _ \ = self._board_sequence(board, curr_bombs, curr_flames, self._search_range, my_position, enemy_mobility=0) # List of the set of survivable time-positions at each time # and preceding positions survivable_no_move, prev_no_move \ = self._search_time_expanded_network(list_boards_no_move, my_position) # Items that can be reached in a survivable manner reachable_items_no_move, reached_no_move, next_to_items_no_move \ = self._find_reachable_items(list_boards_no_move, my_position, survivable_no_move) # Simulation assuming enemies move for enemy_mobility in range(3, -1, -1): # List of boards simulated list_boards, _ = self._board_sequence(board, curr_bombs, curr_flames, self._search_range, my_position, enemy_mobility=enemy_mobility) # List of the set of survivable time-positions at each time # and preceding positions survivable, prev = self._search_time_expanded_network(list_boards, my_position) if len(survivable[1]) > 0: # Gradually reduce the mobility of enemy, so we have at least one survivable action break # Items that can be reached in a survivable manner reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, curr_bombs, curr_flames) survivable_actions = [a for a in is_survivable if is_survivable[a]] if verbose: print("survivable actions are", survivable_actions) # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, moving_direction) print() for t in range(0): print(list_boards[t]) print(survivable[t]) for key in prev[t]: print(key, prev[t][key]) # # Choose an action # """ # This is not effective in the current form if len(survivable_actions) > 1: # avoid the position if only one position at the following step # the number of positions that can be reached from the next position next = defaultdict(set) next_count = defaultdict(int) for position in survivable[1]: next[position] = set([p for p in prev[2] if position in prev[2][p]]) next_count[position] = len(next[position]) print("next count", next_count) if max(next_count.values()) > 1: for position in survivable[1]: if next_count[position] == 1: risky_action = self._get_direction(my_position, position) is_survivable[risky_action] = False survivable_actions = [a for a in is_survivable if is_survivable[a]] """ # Do not stay on a bomb if I can if all([obs["bomb_life"][my_position] > 0, len(survivable_actions) > 1, is_survivable[constants.Action.Stop]]): is_survivable[constants.Action.Stop] = False survivable_actions = [a for a in is_survivable if is_survivable[a]] if len(survivable_actions) == 0: # must die # TODO: might want to do something that can help team mate # TODO: kick if possible print("Must die", constants.Action.Stop) return super().act(obs, action_space) # return constants.Action.Stop.value elif len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value # Move towards good items good_items = [constants.Item.ExtraBomb, constants.Item.IncrRange] # TODO : kick may be a good item only if I cannot kick yet # TODO : might want to destroy good_items.append(constants.Item.Kick) # positions with good items good_time_positions = set() for item in good_items: good_time_positions = good_time_positions.union(reachable_items[item]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward good item", action) return action.value # TODO : shoud check the survivability of all agents in one method # Place a bomb if # - it does not significantly reduce my survivability # - it can break wood # - it can reduce the survivability of enemies if is_survivable[constants.Action.Bomb]: # if survavable now after bomb, consider bomb if all([len(s) > 0 for s in survivable_with_bomb]): # if survivable all the time after bomb, consider bomb if all([self._can_break_wood(list_boards_no_move[-1], my_position, my_blast_strength)] + [not utility.position_is_flames(board, my_position) for board in list_boards_no_move[:10]]): # place bomb if can break wood print("Bomb to break wood", constants.Action.Bomb) return constants.Action.Bomb.value for enemy in my_enemies: # check if the enemy is reachable if len(reachable_items_no_move[enemy]) == 0: continue # can reach the enemy at enemy_position in enemy_time step enemy_time = reachable_items_no_move[enemy][0][0] enemy_position = reachable_items_no_move[enemy][0][1:3] # find direction towards enemy positions = set([x[1:3] for x in next_to_items_no_move[enemy]]) for t in range(enemy_time, 1, -1): _positions = set() for position in positions: _positions = _positions.union(prev_no_move[t][position]) positions = _positions.copy() #if enemy_time <= my_blast_strength: if True: positions.add(my_position) positions_after_bomb = set(survivable[1]).difference(positions) if positions_after_bomb: print("Bomb to kill an enemy", enemy, constants.Action.Bomb) return constants.Action.Bomb.value else: # bomb to kick x0, y0 = my_position positions_against = [(2*x0-x, 2*y0-y) for (x, y) in positions] positions_after_bomb = set(survivable[1]).intersection(positions_against) if positions_after_bomb: print("Bomb to kick", enemy, constants.Action.Bomb) return constants.Action.Bomb.value """ # check if placing a bomb can reduce the survivability # of the enemy survivable_before, _ = self._search_time_expanded_network(list_boards_no_move, enemy_position) board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(curr_bombs) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb(characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, curr_flames, self._search_range, my_position, enemy_mobility=0) survivable_after, _ \ = self._search_time_expanded_network(list_boards_with_bomb, enemy_position) good_before = np.array([len(s) for s in survivable_before]) good_after = np.array([len(s) for s in survivable_after]) # TODO : what are good criteria? if any(good_after < good_before): # place a bomb if it makes sense print("Bomb to kill an enemy", constants.Action.Bomb) return constants.Action.Bomb.value """ # Move towards a wood if len(next_to_items_no_move[constants.Item.Wood]) > 0: # positions next to wood good_time_positions = next_to_items_no_move[constants.Item.Wood] action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward wood", action) return action.value # kick whatever I can kick # -- tentative, this is generally not a good strategy if len(kickable) > 0: while kickable: # then consider what happens if I kick a bomb next_position = kickable.pop() # do not kick a bomb if it will break a wall if all([moving_direction[next_position] is None, self._can_break_wood(board, next_position, my_blast_strength)]): # if it is a static bomb # do not kick if it is breaking a wall continue my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], curr_bombs, curr_flames, self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=3) survivable_with_kick, prev_kick \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: print("Kicking", my_action) return my_action.value # Move towards an enemy good_time_positions = set() for enemy in my_enemies: good_time_positions = good_time_positions.union(next_to_items[enemy]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if obs["bomb_life"][my_position] > 0: # if on a bomb, move away if action == constants.Action.Down and is_survivable[constants.Action.Up]: action = constants.Action.Up elif action == constants.Action.Up and is_survivable[constants.Action.Down]: action = constants.Action.Down elif action == constants.Action.Right and is_survivable[constants.Action.Left]: action = constants.Action.Left elif action == constants.Action.Left and is_survivable[constants.Action.Right]: action = constants.Action.Right else: action = None if action is not None: print("Moving toward/against enemy", action) return action.value # # as in the agent from the previous competition # action = super().act(obs, action_space) if is_survivable[constants.Action(action)]: print("Action from prev. agent", constants.Action(action)) return action else: action = random.choice(survivable_actions) print("Random action", action) return action.value
def _get_flames(self, board, prev_flame_life, bomb_position_strength): """ Summarize information about flames Parameters ---------- board : array pommerman board prev_flame_life : array remaining life of flames in the previous step exploted_position_strength : list list of pairs of position and strength of bombs just exploded Return ------ curr_flames : list list of Flames flame_life : array remaining life of flames """ flame_life = prev_flame_life - (prev_flame_life > 0) # decrement by 1 for (x, y), strength in bomb_position_strength: if not utility.position_is_flames(board, (x, y)): # not exploded yet continue # To up and stop for dx in range(0, strength): position = (x + dx, y) if not self._on_board(position): break elif utility.position_is_flames(board, position): flame_life[position] = 3 # To down for dx in range(1, strength): position = (x - dx, y) if not self._on_board(position): break elif utility.position_is_flames(board, position): flame_life[position] = 3 # To right for dy in range(1, strength): position = (x, y + dy) if not self._on_board(position): break elif utility.position_is_flames(board, position): flame_life[position] = 3 # To left for dy in range(1, strength): position = (x, y - dy) if not self._on_board(position): break elif utility.position_is_flames(board, position): flame_life[position] = 3 curr_flames = list() rows, cols = np.where(flame_life > 0) for position in zip(rows, cols): flame = characters.Flame(position, flame_life[position] - 1) curr_flames.append(flame) return curr_flames, flame_life
def act(self, obs, action_space, info): # # Definitions # #board = obs['board'] board = info["recently_seen"] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [ constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy ] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None my_kick = obs["can_kick"] # whether I can kick # # Understand current situation # # positions that might be blocked if info["teammate_position"] is None: agent_positions = info["enemy_positions"] else: agent_positions = info["enemy_positions"] + [ info["teammate_position"] ] # survivable actions if len(info["enemy_positions"]) > 0: mobility = self._enemy_mobility else: mobility = 0 n_survivable, is_survivable, list_boards \ = self._get_survivable(obs, info, my_position, info["my_next_position"], agent_positions, info["all_kickable"], allow_kick_to_fog=False, enemy_mobility=mobility, enemy_bomb=self._enemy_bomb, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) for a in info["might_block_actions"]: n_survivable[a] = np.zeros(self._search_range) is_survivable[a] = False survivable_actions = list() for a in is_survivable: if not is_survivable[a]: continue if info["might_blocked"][a] and not is_survivable[ constants.Action.Stop]: continue if n_survivable[a][-1] <= 1: is_survivable[a] = False continue survivable_actions.append(a) # # Choose action # if len(survivable_actions) == 0: # # return None, if no survivable actions # return None elif len(survivable_actions) == 1: # # Choose the survivable action, if it is the only choice # action = survivable_actions[0] return action.value if all([ info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb], info["prev_position"] == my_position ]): # if previously blocked, do not reapeat with some probability self._inv_tmp *= self._backoff else: self._inv_tmp = self._inv_tmp_init # # Bomb at a target # # fraction of blocked node in the survival trees of enemies _list_boards = info["list_boards_no_move"] if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) if info["teammate_position"] is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"]) if n_survivable_nodes_teammate[my_teammate] > 0: LB = self._teammate_survivability_threshold / n_survivable_nodes_teammate[ my_teammate] positions_teammate_safe = np.where( total_frac_blocked_teammate < LB) total_frac_blocked_teammate[positions_teammate_safe] = 0 p_survivable = defaultdict(float) for action in n_survivable: p_survivable[action] = sum( n_survivable[action]) / self._my_survivability_threshold if p_survivable[action] > 1: p_survivable[action] = 1 block = defaultdict(float) for action in [ constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: next_position = info["my_next_position"][action] if next_position is None: continue if next_position in info["all_kickable"]: # kick will be considered later continue if all([ utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop] ]): # if the next position is flames, # I want to stop to wait, which must be feasible block[action] = total_frac_blocked[ next_position] * p_survivable[constants.Action.Stop] if info["teammate_position"] is not None: block[action] *= ( 1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) continue elif not is_survivable[action]: continue if all([ info["might_blocked"][action], not is_survivable[constants.Action.Stop] ]): continue block[action] = total_frac_blocked[next_position] * p_survivable[ action] if info["teammate_position"] is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if info["might_blocked"][action]: block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop] + total_frac_blocked[next_position] * p_survivable[action]) / 2 if info["teammate_position"] is not None: block[action] *= ( 1 - total_frac_blocked_teammate[next_position]) if block[action] > 0: block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if is_survivable[constants.Action.Bomb]: list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) n_survivable_nodes_with_bomb = defaultdict(int) for enemy_position in info["enemy_positions"]: # get survivable tree of the enemy _survivable = search_time_expanded_network( list_boards_with_bomb, enemy_position) n_survivable_nodes_with_bomb[enemy_position] = sum( [len(positions) for positions in _survivable]) n_with_bomb = sum([ n_survivable_nodes_with_bomb[enemy_position] for enemy_position in info["enemy_positions"] ]) n_with_none = sum( [n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked_with_bomb = 0 # place more bombs, so the stacked enemy cannot kick x, y = my_position for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + dx, y + dy) following_position = (x + 2 * dx, y + 2 * dy) if not self._on_board(following_position): continue if all([ obs["bomb_life"][next_position] > 0, board[following_position] > constants.Item.AgentDummy.value ]): total_frac_blocked_with_bomb = 1 else: total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none action = constants.Action.Bomb block[action] = total_frac_blocked_with_bomb # block[action] += total_frac_blocked[my_position] * (eisenachAgents - total_frac_blocked_with_bomb) block[action] *= p_survivable[action] block_teammate_with_bomb = None if block[action] > 0: if info["teammate_position"] is not None: block_teammate_with_bomb \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) block_teammate_with_bomb \ += total_frac_blocked_teammate[my_position] * (1 - block_teammate_with_bomb) block[action] *= (1 - block_teammate_with_bomb) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) for next_position in info["kickable"]: action = self._get_direction(my_position, next_position) if not is_survivable[action]: continue list_boards_with_kick, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=action, can_kick=True, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) block[action] \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes, board, my_enemies, ignore_dying_agent=True) block[action] += total_frac_blocked[next_position] * ( 1 - block[action]) block[action] *= p_survivable[action] if block[action] > 0: if info["teammate_position"] is not None: block_teammate_with_kick \ = self._get_frac_blocked_two_lists(list_boards_with_kick, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) block_teammate_with_kick \ += total_frac_blocked_teammate[next_position] * (1 - block_teammate_with_kick) block[action] *= (1 - block_teammate_with_kick) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) max_block = 0 # do not choose zero blocking action as the best best_action = None for action in block: if block[action] > max_block: max_block = block[action] best_action = action if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp: if info["teammate_position"] is not None: if block_teammate_with_bomb is None: block_teammate_with_bomb \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) teammate_safety = block_teammate_with_bomb * n_survivable_nodes_teammate[ my_teammate] if any([ teammate_safety > self._teammate_survivability_threshold, block_teammate_with_bomb < self._interfere_threshold, block_teammate_with_bomb < total_frac_blocked_teammate[my_position] ]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if teammate_ok: if best_action == constants.Action.Bomb: return constants.Action.Bomb.value if best_action == constants.Action.Stop: return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = info["my_next_position"][best_action] should_chase = (total_frac_blocked[next_position] > self._chase_threshold) if info["teammate_position"] is not None: teammate_safety = total_frac_blocked_teammate[ next_position] * n_survivable_nodes_teammate[my_teammate] if any([ teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_teammate[next_position] < self._interfere_threshold, total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position] ]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if should_chase and teammate_ok: if all([ utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop] ]): action = constants.Action.Stop return action.value else: return best_action.value # Exclude the action representing stop to wait max_block = 0 # do not choose zero blocking action as the best best_action = None for action in survivable_actions: if block[action] > max_block: max_block = block[action] best_action = action # # Do not take risky actions when not interacting with enemies # most_survivable_action = self._action_most_survivable(n_survivable) if total_frac_blocked[my_position] > 0: # ignore actions with low survivability _survivable_actions = list() for action in n_survivable: n = sum(n_survivable[action]) if not is_survivable[action]: continue elif n > self._my_survivability_threshold: _survivable_actions.append(action) else: is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: return best_action.value else: # Take the most survivable action return most_survivable_action.value # # Do not interfere with teammate # if all([ info["teammate_position"] is not None, len(info["enemy_positions"]) > 0 or len(info["curr_bombs"]) > 0 ]): # ignore actions that interfere with teammate min_interfere = np.inf least_interfere_action = None _survivable_actions = list() for action in survivable_actions: if action == constants.Action.Bomb: """ if block_teammate_with_bomb is None: block_teammate_with_bomb \ = self._get_frac_blocked_two_lists(list_boards_with_bomb, n_survivable_nodes_teammate, board, [my_teammate], ignore_dying_agent=True) frac = block_teammate_with_bomb """ continue else: next_position = info["my_next_position"][action] frac = total_frac_blocked_teammate[next_position] if frac < min_interfere: min_interfere = frac least_interfere_action = action if frac < self._interfere_threshold: _survivable_actions.append(action) else: is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: # Take the least interfering action return best_action.value else: return least_interfere_action.value consider_bomb = True if not is_survivable[constants.Action.Bomb]: consider_bomb = False # # Find reachable items # # List of boards simulated list_boards, _ = self._board_sequence( board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=mobility, enemy_bomb=self._enemy_bomb, enemy_positions=agent_positions, agent_blast_strength=info["agent_blast_strength"], step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where( info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, _, _ = self._search_time_expanded_network( list_boards, my_position) if len(survivable[-1]) == 0: survivable = [set() for _ in range(len(survivable))] # Items and bomb target that can be reached in a survivable manner if "escape" in info: reachable_items, _, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable, might_powerup=info["escape"]) # might_powerup is the escape from collapse else: _, _, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # # If I have seen an enemy recently and cannot see him now, them move to the last seen position # action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: return action.value # # If I have seen a teammate recently, them move away from the last seen position # action = self._action_away_from_teammate( my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: return action.value # # Move to the places that will not be collapsed # if "escape" in info: # might_powerup is the escape from collapse action = self._action_to_might_powerup(my_position, reachable_items, prev, is_survivable) if action is not None: print("Escape from collapse", action) return action.value # # Move towards a fog where we have not seen longest # action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: #if True: if self.random.uniform() < 0.8: return action.value # # Choose most survivable action # max_block = 0 best_action = None for action in survivable_actions: if action == constants.Action.Bomb: continue score = block[action] if action != constants.Action.Bomb: score += np.random.uniform(0, 1e-3) if score > max_block: max_block = score best_action = action if best_action is None: max_p = 0 best_action = None for action in p_survivable: score = p_survivable[action] if action != constants.Action.Bomb: score += np.random.uniform(0, 1e-3) if score > max_p: max_p = score best_action = action if best_action is None: # this should not be the case return None else: return best_action.value
def act(self, obs, action_space, info): # # Definitions # enemy_mobility = 4 enemy_bomb = 1 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_kick = obs["can_kick"] # whether I can kick print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t") # # Understand current situation # # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) # TODO : PARAMETER TO OPTIMIZE bomb_target_enemy = (total_frac_blocked > 0.1) #np.set_printoptions(precision=2) #print("frac") #print(total_frac_blocked) #print("target") #print(bomb_target_enemy) # List of boards simulated list_boards, _ = self._board_sequence( board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where( info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value #for t, b in enumerate(list_boards): # print(t) # print(b) # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(list_boards, my_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable, _ = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=enemy_mobility, enemy_bomb=enemy_bomb, enemy_blast_strength=info["enemy_blast_strength"]) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() survivable_actions = [a for a in is_survivable if is_survivable[a]] if len(survivable_actions) == 0: return None # # bomb target that can be reached in a survivable manner # reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable, bomb_target_enemy) # # Evaluate the survivability of each action # x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( succ, 1, next_position) if verbose: print("n_survivable") for a in n_survivable: print(a, n_survivable[a]) # # Avoid the action leading to no choice if possible # updated = False max_survivable_positions = max([n[-1] for n in n_survivable.values()]) if max_survivable_positions > 1: for a in n_survivable: if n_survivable[a][-1] > max_survivable_positions / 2: continue is_survivable[a] = False updated = True minn = defaultdict(int) for a in n_survivable: minn[a] = min(n_survivable[a][enemy_mobility:]) maxmin = max(minn.values()) if maxmin > 1: for a in minn: if minn[a] == 1: is_survivable[a] = False updated = True if updated: survivable_actions = [a for a in is_survivable if is_survivable[a]] # # Choose the survivable action, if it is the only choice # if len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value """ # # Bomb if it has dominating survivability # if is_survivable[constants.Action.Bomb]: bomb_is_most_survivable = True bomb_sorted = np.array(sorted(n_survivable[constants.Action.Bomb])) for action in n_survivable: if action == constants.Action.Bomb: continue action_sorted = np.array(sorted(n_survivable[action])) if any(action_sorted > bomb_sorted): bomb_is_most_survivable = False break if bomb_is_most_survivable: action = constants.Action.Bomb print("Bomb to survive", action) return action.value """ # # Bomb at a target # best_action = None max_block = 0 for action in [ constants.Action.Stop, constants.Action.Bomb, constants.Action.Up, constants.Action.Down, constants.Action.Right, constants.Action.Left ]: next_position = self._get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_flames(board, next_position): if not is_survivable[constants.Action.Stop]: # if the next position is flames, # I want to stop to wait, which must be feasible continue else: if not is_survivable[action]: continue block = total_frac_blocked[next_position] if block > max_block: max_block = block best_action = action if all([ is_survivable[constants.Action.Bomb], best_action in [constants.Action.Stop, constants.Action.Bomb] ]): print("Place a bomb at a locally optimal position", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = self._get_next_position(my_position, best_action) # TODO : PARAMETER TO OPTIMIZE if total_frac_blocked[next_position] > 0.01: if utility.position_is_flames(board, next_position): action = constants.Action.Stop print("Wait flames life", action) return action.value else: print("Move towards better place to bomb", best_action) return best_action.value # # Kick # for my_action in kick_actions: if my_action == constants.Action.Up: next_position = (my_position[0] - 1, my_position[1]) elif my_action == constants.Action.Down: next_position = (my_position[0] + 1, my_position[1]) elif my_action == constants.Action.Right: next_position = (my_position[0], my_position[1] + 1) elif my_action == constants.Action.Left: next_position = (my_position[0], my_position[1] - 1) # do not kick a bomb if it will break enemies if info["moving_direction"][next_position] is None: print("checking static bomb") # if it is a static bomb if self._can_break(info["list_boards_no_move"][0], next_position, my_blast_strength, my_enemies): continue list_boards_with_kick_no_move, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) for enemy in my_enemies: rows, cols = np.where(board == enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable, _, _, _ \ = self._search_time_expanded_network(list_boards_with_kick_no_move, enemy_position) n_survivable_nodes_with_kick = sum( [len(positions) for positions in _survivable]) if n_survivable_nodes_with_kick < n_survivable_nodes[enemy]: print("Kicking to reduce the survivability", n_survivable_nodes[enemy], "->", n_survivable_nodes_with_kick, my_action) return my_action.value good_time_positions = reachable_items["target"] if all([(0, ) + my_position in good_time_positions, is_survivable[constants.Action.Bomb]]): print("place a bomb at a bomb target", constants.Action.Bomb) return constants.Action.Bomb.value if good_time_positions: score = [ total_frac_blocked[(x, y)] / (t + 1) for t, x, y in good_time_positions ] argmax = np.argwhere(score == np.max(score)) best_time_positions = [good_time_positions[i[0]] for i in argmax] action = self._find_distance_minimizer( my_position, best_time_positions, #good_time_positions, prev, is_survivable) if action is not None: print("Moving toward where to bomb", action) return action.value # # Move towards a fog where we have not seen longest # best_time_position = None oldest = 0 for t, x, y in next_to_items[constants.Item.Fog]: neighbors = [(x + dx, y + dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]] age = max([ info["since_last_seen"][position] for position in neighbors if self._on_board(position) ]) if age > oldest: oldest = age best_time_position = (t, x, y) if best_time_position is not None: action = self._find_distance_minimizer(my_position, [best_time_position], prev, is_survivable) if action is not None: print("Moving toward oldest fog", action) return action.value # # Choose most survivable action # action = self._get_most_survivable_action(n_survivable) print("Most survivable action", action) return action.value
def act(self, obs, action_space): def convert_bombs(bomb_map): ret = [] locations = np.where(bomb_map > 0) for r, c in zip(locations[0], locations[1]): ret.append({ 'position': (r, c), 'blast_strength': int(bomb_map[(r, c)]) }) return ret depth = 20 my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [constants.Item(e) for e in obs['enemies']] ammo = int(obs['ammo']) blast_strength = int(obs['blast_strength']) if self.prev_pos != None: if self.prev_pos == my_position: if 1 <= self.prev_action.value <= 4: if self.logging: print('freeze') board[self.prev_pos] = constants.Item.Rigid.value items, dist, prev = self._djikstra(board, my_position, bombs, enemies, bomb_timer=self.bomb_time, depth=depth) if self.logging: print('my_position =', my_position) print('board =') print(board) print('dist =') print(dist) print('bombs =', bombs) print('enemies =', enemies) for e in enemies: print(e) pos = items.get(e, []) print('pos =', pos) print('pos_len=', len(pos)) if len(pos) > 0: print('xy=', pos[0][0], ',', pos[0][1]) # print('pos_r =', x, ',',y) print('ammo =', ammo) print('blast_strength =', blast_strength) test_ary = np.ones((11, 11)) for c in range(11): for r in range(11): if (r, c) in dist: test_ary[r, c] = dist[(r, c)] else: test_ary[r, c] = -1 if self.logging: print("dist_mat:") print(test_ary) # update bomb_time map bomb_life = 8 has_bomb = {} already_breakable = np.zeros((11, 11)) for b in bombs: r, c = b['position'] strength = b['blast_strength'] # print('bomb_cr =', c, 'r=', r, 'st=', strength) if self.bomb_time[(r, c)] == 0: self.bomb_time[(r, c)] = bomb_life else: self.bomb_time[(r, c)] -= 1 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, strength): new_pos = (r + d * row, c + d * col) if TestSimpleAgent._out_of_board(new_pos): continue # if new_pos[0] < 0 or new_pos[0] > 10: # continue # if new_pos[1] < 0 or new_pos[1] > 10: # continue if utility.position_is_rigid(board, new_pos): continue if utility.position_is_wood(board, new_pos): already_breakable[new_pos] = 1 if self.bomb_time[new_pos] == 0: self.bomb_time[new_pos] = bomb_life else: self.bomb_time[new_pos] -= 1 has_bomb[new_pos] = 1 # clear up table for c in range(11): for r in range(11): if (r, c) not in has_bomb: self.bomb_time[(r, c)] = 0 if self.logging: print("bomb_time:") print(self.bomb_time) # evaluate each position in terms of breakable woods num_breakable = np.zeros((11, 11)) num_breakable_inside = np.zeros((11, 11)) for c in range(11): for r in range(11): if utility.position_is_wood(board, (r, c)): if already_breakable[(r, c)]: continue for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, blast_strength): new_pos = (r + d * row, c + d * col) if TestSimpleAgent._out_of_board(new_pos): continue if utility.position_is_passable( board, new_pos, enemies) or utility.position_is_flames( board, new_pos): num_breakable[new_pos] += 1 else: break tmp_num = 0 has_passable = False for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if TestSimpleAgent._out_of_board(new_pos): continue if utility.position_is_wood(board, new_pos): tmp_num += 1 elif utility.position_is_passable( board, new_pos, enemies): has_passable = True if (not has_passable) and tmp_num > 0: tmp_num -= 1 num_breakable_inside[(r, c)] = tmp_num if self.logging: print('num_breakable:') print(num_breakable) print('num_breakable_inside:') print(num_breakable_inside) num_breakable_total = np.zeros((11, 11)) for c in range(11): for r in range(11): num_breakable_total[(r, c)] = num_breakable[(r, c)] if num_breakable_total[(r, c)] == -1 or num_breakable_total[( r, c)] == np.inf: continue for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue num_breakable_total[( r, c)] += num_breakable_inside[new_pos] * 0.5 if self.logging: print('num_breakable_total:') print(num_breakable_total) # evaluate each position in total pos_scores = np.zeros((11, 11)) for c in range(11): for r in range(11): if (r, c) not in dist: pos_scores[(r, c)] = -1 continue elif dist[(r, c)] == np.inf: pos_scores[(r, c)] = np.inf continue if num_breakable_total[(r, c)] > 0: pos_scores[(r, c)] += num_breakable_total[(r, c)] pos_scores[(r, c)] += (depth - dist[(r, c)]) * 0.2 # consider power-up items if board[(r, c)] in { constants.Item.ExtraBomb.value, constants.Item.IncrRange.value }: pos_scores[(r, c)] += 50 if self.logging: print('pos_score:') print(pos_scores) # consider degree of freedom dis_to_ene = 100 for e in enemies: pos = items.get(e, []) if len(pos) > 0: d = abs(pos[0][0] - my_position[0]) + abs(pos[0][1] - my_position[1]) if dis_to_ene > d: dis_to_ene = d if dis_to_ene <= -4: # if direction is not None: deg_frees = np.zeros((11, 11)) for c in range(11): for r in range(11): # if pos_scores[(r, c)] == np.inf: # continue if not utility.position_is_passable( board, (r, c), enemies): continue deg_free = 0 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if utility.position_is_passable( board, new_pos, enemies) or utility.position_is_flames( board, new_pos): deg_free += 1 deg_frees[(r, c)] = deg_free if deg_free <= 1: pos_scores[(r, c)] -= 5 if self.logging: print('deg_free') print(deg_frees) # consider bomb blast for i in range(len(bombs)): r, c = bombs[i]['position'] strength = bombs[i]['blast_strength'] pos_scores[(r, c)] = -20 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, strength): new_pos = (r + d * row, c + d * col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if new_pos not in dist: continue elif new_pos == np.inf: continue pos_scores[new_pos] = -20 if self.logging: print('consider blast pos_score:') print(pos_scores) # consider enemies for e in enemies: pos = items.get(e, []) if len(pos) > 0: r = pos[0][0] c = pos[0][1] for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, blast_strength * 2): new_pos = (r + d * row, c + d * col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if not utility.position_is_passable( board, new_pos, enemies): break pos_scores[new_pos] += 0.3 if self.logging: print('consider enemy:') print(pos_scores) h_r, h_c = -1, -1 h_score = -1 for c in range(11): for r in range(11): if (r, c) not in dist: continue elif dist[(r, c)] == np.inf: continue if h_score < pos_scores[(r, c)]: h_score = pos_scores[(r, c)] h_r, h_c = (r, c) if self.logging: print('h_score and pos:', h_score, '(r, c) =', h_r, ',', h_c) print('prev:') print(prev) # if current position is not the highest score position, move to the highest position. if h_r == -1: # print('action: Stop') self.prev_action = constants.Action.Stop # return constants.Action.Stop.value elif pos_scores[my_position] == h_score: if self._can_escape(pos_scores, my_position, blast_strength): # print('set bomb') self.prev_action = constants.Action.Bomb # return constants.Action.Bomb.value else: # print('action: Stop2') self.prev_action = constants.Action.Stop # return constants.Action.Stop.value else: # print('action: backtrack') self.prev_action = self._backtrack(my_position, (h_r, h_c), prev) # return self._backtrack(my_position, (h_r, h_c), prev) self.prev_pos = my_position if self.logging: print('action: ', self.prev_action) return self.prev_action.value # Move if we are in an unsafe place. unsafe_directions = self._directions_in_range_of_bomb( board, my_position, bombs, dist) if unsafe_directions: directions = self._find_safe_directions(board, my_position, unsafe_directions, bombs, enemies) return random.choice(directions).value # Lay pomme if we are adjacent to an enemy. if self._is_adjacent_enemy(items, dist, enemies) and self._maybe_bomb( ammo, blast_strength, items, dist, my_position): return constants.Action.Bomb.value # Move towards an enemy if there is one in exactly three reachable spaces. direction = self._near_enemy(my_position, items, dist, prev, enemies, 3) if direction is not None and (self._prev_direction != direction or random.random() < .5): self._prev_direction = direction return direction.value # Move towards a good item if there is one within two reachable spaces. direction = self._near_good_powerup(my_position, items, dist, prev, 2) if direction is not None: return direction.value # Maybe lay a bomb if we are within a space of a wooden wall. if self._near_wood(my_position, items, dist, prev, 1): if self._maybe_bomb(ammo, blast_strength, items, dist, my_position): return constants.Action.Bomb.value else: return constants.Action.Stop.value # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb. direction = self._near_wood(my_position, items, dist, prev, 2) if direction is not None: directions = self._filter_unsafe_directions( board, my_position, [direction], bombs) if directions: return directions[0].value # Choose a random but valid direction. directions = [ constants.Action.Stop, constants.Action.Left, constants.Action.Right, constants.Action.Up, constants.Action.Down ] valid_directions = self._filter_invalid_directions( board, my_position, directions, enemies) directions = self._filter_unsafe_directions(board, my_position, valid_directions, bombs) directions = self._filter_recently_visited( directions, my_position, self._recently_visited_positions) if len(directions) > 1: directions = [k for k in directions if k != constants.Action.Stop] if not len(directions): directions = [constants.Action.Stop] # Add this position to the recently visited uninteresting positions so we don't return immediately. self._recently_visited_positions.append(my_position) self._recently_visited_positions = self._recently_visited_positions[ -self._recently_visited_length:] return random.choice(directions).value
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] recently_seen_positions = (info["since_last_seen"] < 3) board[recently_seen_positions] = info["last_seen"][recently_seen_positions] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies'] if e != constants.Item.AgentDummy] if obs["teammate"] != constants.Item.AgentDummy: my_teammate = obs["teammate"] else: my_teammate = None my_kick = obs["can_kick"] # whether I can kick if verbose: print("my position", my_position, "ammo", my_ammo, "blast", my_blast_strength, "kick", my_kick, end="\t") my_next_position = {constants.Action.Stop: my_position, constants.Action.Bomb: my_position} for action in [constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right]: next_position = self._get_next_position(my_position, action) if self._on_board(next_position): if board[next_position] == constants.Item.Rigid.value: my_next_position[action] = None else: my_next_position[action] = next_position else: my_next_position[action] = None # # Understand current situation # if all([info["prev_action"] not in [constants.Action.Stop, constants.Action.Bomb], info["prev_position"] == my_position]): # if previously blocked, do not reapeat with some probability self._inv_tmp *= self._backoff else: self._inv_tmp = self._inv_tmp_init # enemy positions enemy_positions = list() for enemy in my_enemies: rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_positions.append((rows[0], cols[0])) # teammate position teammate_position = None if my_teammate is not None: rows, cols = np.where(board==my_teammate.value) if len(rows): teammate_position = (rows[0], cols[0]) # Positions where we kick a bomb if we move to if my_kick: kickable, might_kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() might_kickable = set() # positions that might be blocked if teammate_position is None: agent_positions = enemy_positions else: agent_positions = enemy_positions + [teammate_position] might_blocked = self._get_might_blocked(board, my_position, agent_positions, might_kickable) # enemy positions over time # these might be dissappeared due to extra flames if len(enemy_positions): rows = [p[0] for p in enemy_positions] cols = [p[1] for p in enemy_positions] list_enemy_positions = [(rows, cols)] _enemy_positions = list() for t in range(self._enemy_mobility): rows, cols = list_enemy_positions[-1] for x, y in zip(rows, cols): for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]: next_position = (x + dx, y + dy) if not self._on_board(next_position): continue _board = info["list_boards_no_move"][t] if utility.position_is_passage(_board, next_position): _enemy_positions.append(next_position) _enemy_positions = set(_enemy_positions) rows = [p[0] for p in _enemy_positions] cols = [p[1] for p in _enemy_positions] list_enemy_positions.append((rows, cols)) else: list_enemy_positions = [] # survivable actions is_survivable = dict() for a in self._get_all_actions(): is_survivable[a] = False n_survivable = dict() list_boards = dict() for my_action in self._get_all_actions(): next_position = my_next_position[my_action] if next_position is None: continue if my_action == constants.Action.Bomb: if any([my_ammo == 0, obs["bomb_blast_strength"][next_position] > 0]): continue if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1]): continue if all([my_action != constants.Action.Stop, obs["bomb_blast_strength"][next_position] > 0, next_position not in set.union(kickable, might_kickable)]): continue if next_position in set.union(kickable, might_kickable): # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue # list of boards from next steps list_boards[my_action], _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=my_action, can_kick=my_kick, enemy_mobility=self._enemy_mobility, enemy_bomb=self._enemy_bomb, agent_blast_strength=info["agent_blast_strength"]) # agents might be disappeared, because of overestimated bombs for t, positions in enumerate(list_enemy_positions): list_boards[my_action][t][positions] = constants.Item.AgentDummy.value # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards[my_action])): flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[my_action][t][flame_positions] = constants.Item.Flames.value """ processed = Parallel(n_jobs=-1, verbose=0)( [delayed(search_time_expanded_network)(list_boards[action][1:], my_next_position[action], action) for action in list_boards] ) for survivable, my_action in processed: if my_next_position[my_action] in survivable[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]] """ for my_action in list_boards: survivable = search_time_expanded_network(list_boards[my_action][1:], my_next_position[my_action]) if my_next_position[my_action] in survivable[0]: is_survivable[my_action] = True n_survivable[my_action] = [1] + [len(s) for s in survivable[1:]] survivable_actions = list() for a in is_survivable: if not is_survivable[a]: continue if might_blocked[a] and not is_survivable[constants.Action.Stop]: continue if n_survivable[a][-1] <= 1: is_survivable[a] = False continue survivable_actions.append(a) # # Choose action # if len(survivable_actions) == 0: # # return None, if no survivable actions # return None elif len(survivable_actions) == 1: # # Choose the survivable action, if it is the only choice # action = survivable_actions[0] if verbose: print("The only survivable action", action) return action.value # # Bomb at a target # # fraction of blocked node in the survival trees of enemies _list_boards = deepcopy(info["list_boards_no_move"]) if obs["bomb_blast_strength"][my_position]: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in _list_boards: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value total_frac_blocked, n_survivable_nodes, blocked_time_positions \ = self._get_frac_blocked(_list_boards, my_enemies, board, obs["bomb_life"]) if teammate_position is not None: total_frac_blocked_teammate, n_survivable_nodes_teammate, blocked_time_positions_teammate \ = self._get_frac_blocked(_list_boards, [my_teammate], board, obs["bomb_life"]) """ np.set_printoptions(precision=3) print("enemy") print(total_frac_blocked) print("teammate") print(total_frac_blocked_teammate) print("product") prod = total_frac_blocked * (1 - total_frac_blocked_teammate) print(prod[:5,:5]) """ p_survivable = defaultdict(float) for action in n_survivable: p_survivable[action] = sum(n_survivable[action]) / self._my_survivability_threshold if p_survivable[action] > 1: p_survivable[action] = 1 block = defaultdict(float) for action in [constants.Action.Stop, constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right]: next_position = my_next_position[action] if next_position is None: continue if next_position in set.union(kickable, might_kickable): # kick will be considered later continue if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop]]): # if the next position is flames, # I want to stop to wait, which must be feasible block[action] = total_frac_blocked[next_position] * p_survivable[constants.Action.Stop] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) continue elif not is_survivable[action]: continue if all([might_blocked[action], not is_survivable[constants.Action.Stop]]): continue block[action] = total_frac_blocked[next_position] * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if might_blocked[action]: block[action] = (total_frac_blocked[my_position] * p_survivable[constants.Action.Stop] + total_frac_blocked[next_position] * p_survivable[action]) / 2 if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) if is_survivable[constants.Action.Bomb]: list_boards_with_bomb, _ \ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_blast_strength=my_blast_strength, my_action=constants.Action.Bomb) n_survivable_nodes_with_bomb = defaultdict(int) for enemy in my_enemies: # get survivable tree of the enemy rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable = search_time_expanded_network(list_boards_with_bomb, enemy_position) n_survivable_nodes_with_bomb[enemy] = sum([len(positions) for positions in _survivable]) n_with_bomb = sum([n_survivable_nodes_with_bomb[enemy] for enemy in my_enemies]) n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked_with_bomb = 0 # place more bombs, so the stacked enemy cannot kick x, y = my_position for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + dx, y + dy) following_position = (x + 2 * dx, y + 2 * dy) if not self._on_board(following_position): continue if all([obs["bomb_life"][next_position] > 0, board[following_position] > constants.Item.AgentDummy.value]): total_frac_blocked_with_bomb = 1 else: total_frac_blocked_with_bomb = 1 - n_with_bomb / n_with_none if teammate_position is not None: # get survivable tree of the teammate _survivable = search_time_expanded_network(list_boards_with_bomb, teammate_position) n_survivable_nodes_with_bomb_teammate = sum([len(positions) for positions in _survivable]) n_with_bomb = n_survivable_nodes_with_bomb_teammate n_with_none = n_survivable_nodes_teammate[my_teammate] if n_with_none == 0: total_frac_blocked_with_bomb_teammate = 0 else: total_frac_blocked_with_bomb_teammate = 1 - n_with_bomb / n_with_none action = constants.Action.Bomb block[action] = total_frac_blocked_with_bomb * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_with_bomb_teammate) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) for next_position in kickable: action = self._get_direction(my_position, next_position) if not is_survivable[action]: continue list_boards_with_kick, _ \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=action, can_kick=True) n_survivable_nodes_with_kick = defaultdict(int) for enemy in my_enemies: # get survivable tree of the enemy rows, cols = np.where(board==enemy.value) if len(rows) == 0: continue enemy_position = (rows[0], cols[0]) _survivable = search_time_expanded_network(list_boards_with_kick, enemy_position) n_survivable_nodes_with_kick[enemy] = sum([len(positions) for positions in _survivable]) n_with_kick = sum([n_survivable_nodes_with_kick[enemy] for enemy in my_enemies]) n_with_none = sum([n_survivable_nodes[enemy] for enemy in my_enemies]) if n_with_none == 0: total_frac_blocked[next_position] = 0 else: total_frac_blocked[next_position] = 1 - n_with_kick / n_with_none if teammate_position is not None: # get survivable tree of the teammate _survivable = search_time_expanded_network(list_boards_with_kick, teammate_position) n_survivable_nodes_with_kick_teammate = sum([len(positions) for positions in _survivable]) n_with_kick = n_survivable_nodes_with_kick_teammate n_with_none = n_survivable_nodes_teammate[my_teammate] if n_with_none == 0: total_frac_blocked_teammate[next_position] = 0 else: total_frac_blocked_teammate[next_position] = 1 - n_with_kick / n_with_none block[action] = total_frac_blocked[next_position] * p_survivable[action] if teammate_position is not None: block[action] *= (1 - total_frac_blocked_teammate[next_position]) block[action] *= self._inv_tmp block[action] -= np.log(-np.log(self.random.uniform())) max_block = -np.inf best_action = None for action in block: if block[action] > max_block: max_block = block[action] best_action = action if block[constants.Action.Bomb] > self._bomb_threshold * self._inv_tmp: if teammate_position is not None: teammate_safety = total_frac_blocked_with_bomb_teammate * n_survivable_nodes_with_bomb_teammate if any([teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_with_bomb_teammate < self._interfere_threshold, total_frac_blocked_with_bomb_teammate < total_frac_blocked_teammate[my_position]]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if teammate_ok: if best_action == constants.Action.Bomb: if verbose: print("Bomb is best", constants.Action.Bomb) return constants.Action.Bomb.value if best_action == constants.Action.Stop: if verbose: print("Place a bomb at a locally optimal position", constants.Action.Bomb) return constants.Action.Bomb.value # # Move towards where to bomb # if best_action not in [None, constants.Action.Bomb]: next_position = my_next_position[best_action] should_chase = (total_frac_blocked[next_position] > self._chase_threshold) if teammate_position is not None: teammate_safety = total_frac_blocked_teammate[next_position] * n_survivable_nodes_teammate[my_teammate] if any([teammate_safety > self._teammate_survivability_threshold, total_frac_blocked_teammate[next_position] < self._interfere_threshold, total_frac_blocked_teammate[next_position] < total_frac_blocked_teammate[my_position]]): teammate_ok = True else: teammate_ok = False else: teammate_ok = True if should_chase and teammate_ok: if all([utility.position_is_flames(board, next_position), info["flame_life"][next_position] > 1, is_survivable[constants.Action.Stop]]): action = constants.Action.Stop if verbose: print("Wait flames life", action) return action.value else: if verbose: print("Move towards better place to bomb", best_action) return best_action.value # Exclude the action representing stop to wait max_block = -np.inf best_action = None for action in survivable_actions: if block[action] > max_block: max_block = block[action] best_action = action # # Do not take risky actions # most_survivable_action = self._action_most_survivable(n_survivable) # ignore actions with low survivability _survivable_actions = list() for action in n_survivable: n = sum(n_survivable[action]) if not is_survivable[action]: continue elif n > self._my_survivability_threshold: _survivable_actions.append(action) else: print("RISKY", action) is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: if verbose: print("Take the best action in danger", best_action) return best_action.value else: # Take the most survivable action if verbose: print("Take the most survivable action", most_survivable_action) return most_survivable_action.value # # Do not interfere with teammate # if all([teammate_position is not None, len(enemy_positions) > 0 or len(info["curr_bombs"]) > 0]): # ignore actions that interfere with teammate min_interfere = np.inf least_interfere_action = None _survivable_actions = list() for action in survivable_actions: if action == constants.Action.Bomb: frac = total_frac_blocked_with_bomb_teammate else: next_position = my_next_position[action] frac = total_frac_blocked_teammate[next_position] if frac < min_interfere: min_interfere = frac least_interfere_action = action if frac < self._interfere_threshold: _survivable_actions.append(action) else: print("INTERFERE", action) is_survivable[action] = False if len(_survivable_actions) > 1: survivable_actions = _survivable_actions elif best_action is not None: # Take the least interfering action if verbose: print("Take the best action in intereference", best_action) return best_action.value else: if verbose: print("Take the least interfering action", least_interfere_action) return least_interfere_action.value consider_bomb = True if not is_survivable[constants.Action.Bomb]: consider_bomb = False # # Find reachable items # # List of boards simulated list_boards, _ = self._board_sequence(board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=self._enemy_mobility, enemy_bomb=self._enemy_bomb, agent_blast_strength=info["agent_blast_strength"]) # some bombs may explode with extra bombs, leading to under estimation for t in range(len(list_boards)): flame_positions = np.where(info["list_boards_no_move"][t] == constants.Item.Flames.value) list_boards[t][flame_positions] = constants.Item.Flames.value # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, _, _ = self._search_time_expanded_network(list_boards, my_position) if len(survivable[-1]) == 0: survivable = [set() for _ in range(len(survivable))] # Items and bomb target that can be reached in a survivable manner _, _, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # # If I have seen an enemy recently and cannot see him now, them move to the last seen position # action = self._action_to_enemy(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info, my_enemies) if action is not None: if verbose: print("Moving toward last seen enemy", action) return action.value # # If I have seen a teammate recently, them move away from the last seen position # action = self._action_away_from_teammate(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info, my_teammate) if action is not None: if verbose: print("Moving away from last seen teammate", action) return action.value # # Move towards a fog where we have not seen longest # action = self._action_to_fog(my_position, next_to_items[constants.Item.Fog], prev, is_survivable, info) if action is not None: #if True: if self.random.uniform() < 0.8: if verbose: print("Moving toward oldest fog", action) return action.value # # Choose most survivable action # max_block = -np.inf best_action = None for action in survivable_actions: if action == constants.Action.Bomb: continue if block[action] > max_block: max_block = block[action] best_action = action if verbose: print("Take the best action among safe actions (nothing else to do)", best_action) if best_action is None: # this should not be the case return None else: return best_action.value
def act(self, obs, action_space, info): # # Definitions # #board = obs['board'] board = info['recently_seen'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] fog_positions = np.where(board == constants.Item.Fog.value) board[fog_positions] = info["last_seen"][fog_positions] # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(info["list_boards_no_move"], my_position) if len(survivable[-1]) == 0: survivable = [set() for _ in range(len(survivable))] # where to place bombs to break wood digging, bomb_target = self._get_digging_positions(board, my_position, info) if digging is None: bomb_target, n_breakable \ = self._get_bomb_target(info["list_boards_no_move"][-1], my_position, my_blast_strength, constants.Item.Wood) # Items that can be reached in a survivable manner reachable_items, _, next_to_items \ = self._find_reachable_items(info["list_boards_no_move"], my_position, survivable, bomb_target, info["might_powerup"]) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info, step_to_collapse=info["step_to_collapse"], collapse_ring=info["collapse_ring"]) survivable_actions = [a for a in is_survivable if is_survivable[a]] # # Choose an action # if len(survivable_actions) == 0: # This should not happen return None elif len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] return action.value # # Place a bomb # consider_bomb = True if survivable_with_bomb is None: consider_bomb = False elif not bomb_target[my_position]: consider_bomb = False elif any([len(s) <= 0 for s in survivable_with_bomb]): # if not survivable all the time after bomb, do not bomb consider_bomb = False elif self._might_break_powerup(info["list_boards_no_move"][-1], my_position, my_blast_strength, info["might_powerup"]): # if might break an item, do not bomb consider_bomb = False if consider_bomb: # place bomb if I am at a bomb target return constants.Action.Bomb.value good_time_positions = reachable_items["target"] if digging and good_time_positions: time_to_reach = good_time_positions[0][0] if any([my_ammo and board[digging] in [constants.Item.Passage.value, constants.Item.ExtraBomb.value, constants.Item.IncrRange.value, constants.Item.Kick.value], info["flame_life"][digging] <= time_to_reach and utility.position_is_flames(board, digging)]): action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: return action.value # Move towards good items # TODO : kick may be a good item only if I cannot kick yet # TODO : might want to destroy good_items = [constants.Item.ExtraBomb, constants.Item.IncrRange, constants.Item.Kick] # positions with good items good_time_positions = set() for item in good_items: good_time_positions = good_time_positions.union(reachable_items[item]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: return action.value # # Move towards where to bomb # good_time_positions = reachable_items["target"] # If I have no bomb, I do not want to wait at the target that will be covered by flames # before I can place a bomb if my_ammo == 0: first_blast_time = constants.DEFAULT_BOMB_LIFE for t, x, y in reachable_items[constants.Item.Bomb]: life = obs["bomb_life"][(x,y)] if life < first_blast_time: first_blast_time = life _good_time_positions = list() for t, x, y in good_time_positions: if any([t > first_blast_time, info["list_boards_no_move"][int(first_blast_time)][(x, y)] != constants.Item.Flames.value]): _good_time_positions.append((t, x, y)) if _good_time_positions: good_time_positions = _good_time_positions action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: return action.value # # Move toward might powerups # good_time_positions = reachable_items["might_powerup"] if len(good_time_positions): action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: return action.value # # Move towards a fog where we have not seen longest # best_time_position = None oldest = 0 for t, x, y in next_to_items[constants.Item.Fog]: neighbors = [(x+dx, y+dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]] age = max([info["since_last_seen"][position] for position in neighbors if self._on_board(position)]) age += self.random.uniform() if age > oldest: oldest = age best_time_position = (t, x, y) if best_time_position is not None: action = self._find_distance_minimizer(my_position, [best_time_position], prev, is_survivable) if action is not None: return action.value # # Random action # if constants.Action.Bomb in survivable_actions: survivable_actions.remove(constants.Action.Bomb) action = self.random.choice(survivable_actions) return action.value
def _get_flames(self, board, prev_board, prev_flame_life, bomb_position_strength, moving_direction): """ Summarize information about flames Parameters ---------- board : array pommerman board prev_flame_life : array remaining life of flames in the previous step bomb_position_strength : list list of pairs of position and strength of bombs just exploded moving_direction : array direction of moving bombs Return ------ curr_flames : list list of Flames flame_life : array remaining life of flames """ # decrement the life of existing flames by 1 flame_life = prev_flame_life - (prev_flame_life > 0) # set the life of new flames locations = np.where((prev_board != constants.Item.Flames.value) * (board == constants.Item.Flames.value)) flame_life[locations] = 3 # set the life of overestimated flames at 0 locations = np.where(board != constants.Item.Flames.value) flame_life[locations] = 0 for (x, y), strength in bomb_position_strength: # for moving bombs, we cannot exactly tell whether it has stopped or not # so, consider both possibility dx = 0 dy = 0 if moving_direction[(x, y)] == constants.Action.Right: dy = 1 elif moving_direction[(x, y)] == constants.Action.Left: dy = -1 elif moving_direction[(x, y)] == constants.Action.Down: dx = 1 elif moving_direction[(x, y)] == constants.Action.Up: dx = -1 possible_positions = [(x, y)] if moving_direction[(x, y)] is not None: next_position = (x + dx, y + dy) if self._on_board(next_position): possible_positions.append(next_position) """ # there is also a possibility that a bomb just started to move, or the direction is changed by kicking for (dx, dy) in [(1, 0), (-1, 0), (0, 1), (0, -1)]: agent_position = (x + dx, y + dy) if not self._on_board(agent_position): continue if not utility.position_is_agent(prev_board, agent_position): continue # the agent might have kicked next_position = (x - dx, y - dy) if self._on_board(next_position): possible_positions.append(next_position) """ for (xx, yy) in possible_positions: if not utility.position_is_flames(board, (xx, yy)): # not exploded yet continue # To up and stop for dx in range(0, strength): position = (xx + dx, yy) if not self._on_board(position): break elif utility.position_is_flames(board, position): flame_life[position] = 3 # To down for dx in range(1, strength): position = (xx - dx, yy) if not self._on_board(position): break elif utility.position_is_flames(board, position): flame_life[position] = 3 # To right for dy in range(1, strength): position = (xx, yy + dy) if not self._on_board(position): break elif utility.position_is_flames(board, position): flame_life[position] = 3 # To left for dy in range(1, strength): position = (xx, yy - dy) if not self._on_board(position): break elif utility.position_is_flames(board, position): flame_life[position] = 3 curr_flames = list() rows, cols = np.where(flame_life > 0) for position in zip(rows, cols): flame = characters.Flame(position, flame_life[position] - 1) curr_flames.append(flame) return curr_flames, flame_life
def act(self, obs, action_space, info): # # Definitions # self._search_range = 10 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] if verbose: print("My position", my_position, end="\t") # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(info["list_boards_no_move"], my_position) # where to place bombs to break wood bomb_target = np.full(board.shape, False) digging = None if board[my_position] == constants.Item.Agent0.value: for n in [4, 5, 6]: if utility.position_is_wood(info["last_seen"], (1, n)): bomb_target[(1, n - 1)] = True digging = (1, n - 1) break elif board[my_position] == constants.Item.Agent1.value: for m in [6, 5, 4]: if utility.position_is_wood(info["last_seen"], (m, 1)): bomb_target[(m + 1, 1)] = True digging = (m + 1, 1) break elif board[my_position] == constants.Item.Agent2.value: for m in [6, 5, 4]: if utility.position_is_wood(info["last_seen"], (m, 9)): bomb_target[(m + 1, 9)] = True digging = (m + 1, 9) break elif board[my_position] == constants.Item.Agent3.value: for n in [6, 5, 4]: if utility.position_is_wood(info["last_seen"], (1, n)): bomb_target[(1, n + 1)] = True digging = (1, n + 1) break if digging is None: bomb_target, n_breakable \ = self._get_bomb_target(info["list_boards_no_move"][-1], my_position, my_blast_strength, constants.Item.Wood) # Items that can be reached in a survivable manner reachable_items, _, next_to_items \ = self._find_reachable_items(info["list_boards_no_move"], my_position, survivable, bomb_target) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"]) survivable_actions = [a for a in is_survivable if is_survivable[a]] if verbose: print("survivable actions are", survivable_actions) # # Choose an action # if len(survivable_actions) == 0: # This should not happen return None elif len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value # # Place a bomb # consider_bomb = True if survivable_with_bomb is None: consider_bomb = False elif not bomb_target[my_position]: consider_bomb = False elif any([len(s) <= 0 for s in survivable_with_bomb]): # if not survivable all the time after bomb, do not bomb consider_bomb = False elif self._might_break_powerup(info["list_boards_no_move"][-1], my_position, my_blast_strength, info["might_powerup"]): # if might break an item, do not bomb consider_bomb = False if consider_bomb: # place bomb if I am at a bomb target print("Bomb at a bomb target", constants.Action.Bomb) return constants.Action.Bomb.value good_time_positions = reachable_items["target"] if digging and good_time_positions: time_to_reach = good_time_positions[0][0] if any([ my_ammo and board[digging] in [ constants.Item.Passage.value, constants.Item.ExtraBomb.value, constants.Item.IncrRange.value, constants.Item.Kick.value ], info["flame_life"][digging] <= time_to_reach and utility.position_is_flames(board, digging) ]): action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Move to dig", action) return action.value # Move towards good items # TODO : kick may be a good item only if I cannot kick yet # TODO : might want to destroy good_items = [ constants.Item.ExtraBomb, constants.Item.IncrRange, constants.Item.Kick ] # positions with good items good_time_positions = set() for item in good_items: good_time_positions = good_time_positions.union( reachable_items[item]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward good item", action) return action.value # # Move towards where to bomb # good_time_positions = reachable_items["target"] # If I have no bomb, I do not want to wait at the target that will be covered by flames # before I can place a bomb if my_ammo == 0: first_blast_time = constants.DEFAULT_BOMB_LIFE for t, x, y in reachable_items[constants.Item.Bomb]: life = obs["bomb_life"][(x, y)] if life < first_blast_time: first_blast_time = life _good_time_positions = list() for t, x, y in good_time_positions: if any([ t > first_blast_time, info["list_boards_no_move"][int(first_blast_time)][( x, y)] != constants.Item.Flames.value ]): _good_time_positions.append((t, x, y)) if _good_time_positions: good_time_positions = _good_time_positions action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward where to bomb", action) return action.value # # TODO : move toward might powerups # # # Move towards a fog where we have not seen longest # best_time_position = None oldest = 0 for t, x, y in next_to_items[constants.Item.Fog]: neighbors = [(x + dx, y + dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]] age = max([ info["since_last_seen"][position] for position in neighbors if self._on_board(position) ]) if age > oldest: oldest = age best_time_position = (t, x, y) if best_time_position is not None: action = self._find_distance_minimizer(my_position, [best_time_position], prev, is_survivable) if action is not None: print("Moving toward oldest fog", action) return action.value # # Random action # if constants.Action.Bomb in survivable_actions: survivable_actions.remove(constants.Action.Bomb) action = random.choice(survivable_actions) print("Random action", action, survivable_actions) return action.value