def noop_board_sequence(obs, length): """ Simulate the sequence of boards, assuming agents stay unmoved """ model = ForwardModel() # Dummy objects actions = [constants.Action.Stop.value] * 4 # agents stay unmoved curr_agents = list() # empty list of Bombers curr_items = dict() # we never know hidden items # Prepare initial state curr_board = obs["board"] curr_bombs = list() rows, cols = np.where(obs["bomb_life"] > 0) for row, col in zip(rows, cols): bomber = characters.Bomber() # dummy owner of the bomb position = (row, col) life = int(obs["bomb_life"][row][col]) blast_strength = int(obs["bomb_blast_strength"][row][col]) moving_direction = None # TODO: this may be known bomb = characters.Bomb(bomber, position, life, blast_strength, moving_direction) curr_bombs.append(bomb) # overwrite bomb over agent if they overlap curr_board[position] = constants.Item.Bomb.value curr_flames = list() rows, cols = np.where(obs["board"] == constants.Item.Flames.value) for row, col in zip(rows, cols): position = (row, col) life = None # TODO: this may be known if life is not None: flame = characters.Flame(position, life) else: flame = characters.Flame(position) curr_flames.append(flame) # Simulate list_boards = [curr_board.copy()] for _ in range(length): curr_board, _, curr_bombs, _, curr_flames = model.step(actions, curr_board, curr_agents, curr_bombs, curr_items, curr_flames) list_boards.append(curr_board.copy()) return list_boards
def get_initial_game_data(obs, my_id, max_steps=1000): game_data = GameData() game_data.board_size = len(obs['board']) game_data.step_count = obs['step_count'] - 1 game_data.max_steps = max_steps game_data.game_type = obs['game_type'] game_data.simulation_bomb_life = None # board game_data.board = EnvSimulator.get_board(game_data.board_size, obs['board']) # items game_data.items = {} # agents game_data.agents = [] for id in [ constants.Item.Agent0.value - 10, constants.Item.Agent1.value - 10 ]: board_id = id + 10 agent = characters.Bomber(id, game_data.game_type) agent.set_start_position( EnvSimulator.get_position(game_data.board, board_id, True)) if (id == my_id): agent.reset(int(obs['ammo']), board_id in obs['alive'], int(obs['blast_strength']), bool(obs['can_kick'])) else: agent.reset(agent.ammo, board_id in obs['alive'], agent.blast_strength, agent.can_kick) game_data.agents.append(agent) # bombs game_data.bombs = [] bomb_array = EnvSimulator.get_position(game_data.board, constants.Item.Bomb.value, False) if len(bomb_array) > 0: raise ValueError('Invalid: no bombs allowed in initial state') # flames game_data.flames = [] flame_array = EnvSimulator.get_position(game_data.board, constants.Item.Flames.value, False) if len(flame_array) > 0: raise ValueError('Invalid: no flames allowed in initial state') # done game_data.done = forward_model.ForwardModel.get_done( game_data.agents, game_data.step_count, game_data.max_steps, game_data.game_type, None) return game_data
def __init__(self, obs, init=False, bombing_agents={}, board_size=11): self._game_mode = constants.GameType.FFA self.move = None self._board_size = board_size self._obs = obs self._my_position = tuple(obs['position']) self._board = np.array(obs['board']) self._bomb_life = np.array(self._obs['bomb_life']) self._teammate = obs['teammate'] self._enemies = [constants.Item(e) for e in obs['enemies']] self._ammo = int(obs['ammo']) self.fm = forward_model.ForwardModel() self.self_agent = self.find_self_agent(self._obs) agents_id = [ constants.Item.Agent0, constants.Item.Agent1, constants.Item.Agent2, constants.Item.Agent3 ] self._agents = [ characters.Bomber(aid.value, "FFA") for aid in agents_id ] # remember to modifiy if it is team or radio mode self.bombing_agents = copy.deepcopy(bombing_agents) self.score = 0 if init: self.curr_flames = self.convert_flames( self._board) # determine by confirming the map self.curr_bombs = self.convert_bombs( np.array(obs['bomb_blast_strength']), np.array(obs['bomb_life'])) self.curr_items = self.convert_items(self._board) self.curr_agents = self.convert_agents(self._board) self.last_items = self.curr_items if (bombing_agents != {}): self.curr_bombs = self.convert_bombs_two( np.array(self._obs['bomb_blast_strength']), self._bomb_life, bombing_agents)
def get_gamedata(gamestate, game_type): game_data = env_simulator.GameData() game_data.board_size = gamestate['board_size'] game_data.step_count = gamestate['step_count'] - 1 game_data.max_steps = 800 game_data.game_type = game_type game_data.simulation_bomb_life = None # board game_data.board = gamestate['board'] # items game_data.items = {} # agents game_data.agents = [] for a in gamestate['agents']: id = a['agent_id'] board_id = id + 10 agent = characters.Bomber(id, game_data.game_type) agent.set_start_position(get_position(game_data.board, board_id, True)) agent.reset(a['ammo'], a['is_alive'], a['blast_strength'], a['can_kick']) game_data.agents.append(agent) # bombs game_data.bombs = [] for b in gamestate['bombs']: bomb = characters.Bomb(**b) game_data.bombs.append(bomb) # flames game_data.flames = [] for f in gamestate['flames']: flame = characters.Flame(**f) game_data.flames.append(flame) # done game_data.done = forward_model.ForwardModel.get_done( game_data.agents, game_data.step_count, game_data.max_steps, game_data.game_type, None) return game_data
def _get_survivable_actions(self, survivable, obs, curr_bombs, curr_flames): my_position = obs["position"] my_blast_strength = obs["blast_strength"] # is_survivable[action]: whether survivable with action is_survivable = defaultdict(bool) x, y = my_position if (x + 1, y) in survivable[1]: is_survivable[constants.Action.Down] = True if (x - 1, y) in survivable[1]: is_survivable[constants.Action.Up] = True if (x, y + 1) in survivable[1]: is_survivable[constants.Action.Right] = True if (x, y - 1) in survivable[1]: is_survivable[constants.Action.Left] = True if (x, y) in survivable[1]: is_survivable[constants.Action.Stop] = True # TODO : shoud check the survivability of all agents in one method # If I have at least one bomb, no bomb in my position, # and the position is safe # then consider what happens if I lay a bomb if all([obs["ammo"] > 0, obs["bomb_life"][my_position] == 0, is_survivable[constants.Action.Stop], sum(is_survivable.values()) > 1]): board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(curr_bombs) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb(characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, curr_flames, self._search_range, my_position) survivable_with_bomb, prev_bomb \ = self._search_time_expanded_network(list_boards_with_bomb, my_position) if my_position in survivable_with_bomb[1]: is_survivable[constants.Action.Bomb] = True else: survivable_with_bomb = None list_boards_with_bomb = None return is_survivable, survivable_with_bomb
def _board_sequence(self, board, bombs, flames, length, my_position, my_action=None, can_kick=False, enemy_mobility=3): """ Simulate the sequence of boards, assuming agents stay unmoved Parameters ---------- board : array initial board bombs : list list of initial bombs flames : list list of initial flames length : int length of the board sequence to simulate my_position : tuple position of my agent my_action : Action, optional my action at the first step can_kick : boolean, optional whether I can kick enemy_mobility : int, optional number of steps where enemies move nondeterministically Return ------ list_boards : list list of boards """ # Forward model to simulate model = ForwardModel() # Prepare initial state _board = board.copy() _bombs = deepcopy(bombs) _flames = deepcopy(flames) _items = dict() # we never know hidden items _actions = [constants.Action.Stop.value] * 4 if my_action is not None: agent = characters.Bomber() agent.agent_id = board[my_position] - 10 agent.position = my_position agent.can_kick = can_kick _agents = [agent] _actions[agent.agent_id] = my_action else: _agents = list() my_next_position = None # Get enemy positions to take into account their mobility rows, cols = np.where(_board > constants.Item.AgentDummy.value) enemy_positions = [position for position in zip(rows, cols) if position != my_position] # List of enemies enemies = list() for position in enemy_positions: agent = characters.Bomber() agent.agent_id = board[position] - 10 agent.position = position enemies.append(agent) _agents = _agents + enemies # Overwrite bomb over agent if they overlap for bomb in _bombs: _board[bomb.position] = constants.Item.Bomb.value # Simulate list_boards = [_board.copy()] for t in range(length): # Standard simulation step _board, _agents, _bombs, _, _flames \ = model.step(_actions, _board, _agents, _bombs, _items, _flames) # Overwrite passage over my agent when it has moved to a passage if t == 0 and len(_agents) > 0: agent = _agents[0] my_next_position = agent.position if all([agent.position != my_position, _board[agent.position] != constants.Item.Flames.value, _board[agent.position] != constants.Item.Bomb.value]): # I did not die and did not stay on a bomb _board[agent.position] = constants.Item.Passage.value # Overwrite bomb over agent if they overlap for bomb in _bombs: _board[bomb.position] = constants.Item.Bomb.value # Take into account the nondeterministic mobility of enemies if t < enemy_mobility: _enemy_positions = set() for x, y in enemy_positions: # for each enemy position in the previous step for dx, dy in [(0, 0), (1, 0), (-1, 0), (0, 1), (0, -1)]: # consider the next possible position next_position = (x + dx, y + dy) if not self._on_board(next_position): # ignore if out of board continue if any([utility.position_is_passage(_board, next_position), utility.position_is_powerup(_board, next_position), (next_position == my_position and utility.position_is_agent(_board, next_position) )]): # possible as a next position # TODO : what to do with my position _enemy_positions.add(next_position) _board[next_position] = constants.Item.AgentDummy.value enemy_positions = _enemy_positions _actions = [constants.Action.Stop.value] * 4 _agents = enemies list_boards.append(_board.copy()) return list_boards, my_next_position
def _get_bombs(self, obs, prev_bomb_life): """ Summarize information about bombs Parameters ---------- obs : dict pommerman observation prev_bomb_life : array remaining life of bombs at the previous step Return ------ curr_bombs : list list of bombs moving_direction : array array of moving direction of bombs moving_direction[position] : direction of bomb at position bomb_life : array Copy the remaining life of bombs for the next step """ # Prepare information about moving bombs # diff = 0 if no bomb -> no bomb # diff = 1 if the remaining life of a bomb is decremented # diff = -9 if no bomb -> bomb diff = prev_bomb_life - obs["bomb_life"] moving = (diff != 0) * (diff != 1) * (diff != -9) # move_from: previous positions of moving bombs rows, cols = np.where(moving * (diff > 0)) move_from = [position for position in zip(rows, cols)] # move_to: current positions of moving bombs rows, cols = np.where(moving * (diff < 0)) move_to = [position for position in zip(rows, cols)] curr_bombs = list() rows, cols = np.where(obs["bomb_life"] > 0) moving_direction = np.full(self.board_shape, None) for position in zip(rows, cols): this_bomb_life = obs["bomb_life"][position] if position in move_to: # then the bomb is moving, so find the moving direction for prev_position in move_from: if prev_bomb_life[prev_position] != this_bomb_life + 1: # the previous life of the bomb at the previous position # must be +1 of the life of this bomb continue dx = position[0] - prev_position[0] dy = position[1] - prev_position[1] if abs(dx) + abs(dy) != 1: # the previous position must be 1 manhattan distance # from this position continue moving_direction[position] = self._get_direction(prev_position, position) # TODO: there might be multiple possibilities of # where the bomb came from break bomb = characters.Bomb(characters.Bomber(), # dummy owner of the bomb position, this_bomb_life, int(obs["bomb_blast_strength"][position]), moving_direction[position]) curr_bombs.append(bomb) return curr_bombs, moving_direction, obs["bomb_life"].copy()
def act(self, obs, action_space, info): # # Definitions # self._search_range = 10 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_kick = obs["can_kick"] # whether I can kick print("my position", my_position, end="\t") # # Understand current situation # # List of the set of survivable time-positions at each time # and preceding positions survivable_no_move, prev_no_move \ = self._search_time_expanded_network(info["list_boards_no_move"], my_position) # Items that can be reached in a survivable manner reachable_items_no_move, reached_no_move, next_to_items_no_move \ = self._find_reachable_items(info["list_boards_no_move"], my_position, survivable_no_move) # Simulation assuming enemies move for enemy_mobility in range(3, -1, -1): # List of boards simulated list_boards, _ = self._board_sequence( board, info["curr_bombs"], info["curr_flames"], self._search_range, my_position, enemy_mobility=enemy_mobility) # List of the set of survivable time-positions at each time # and preceding positions survivable, prev = self._search_time_expanded_network( list_boards, my_position) if len(survivable[1]) > 0: # Gradually reduce the mobility of enemy, so we have at least one survivable action break # Items that can be reached in a survivable manner reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"]) survivable_actions = [a for a in is_survivable if is_survivable[a]] # if verbose: if True: print("survivable actions are", survivable_actions) # Positions where we kick a bomb if we move to if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print() for t in range(0): print(list_boards[t]) print(survivable[t]) for key in prev[t]: print(key, prev[t][key]) # # Choose an action # """ # This is not effective in the current form if len(survivable_actions) > 1: # avoid the position if only one position at the following step # the number of positions that can be reached from the next position next = defaultdict(set) next_count = defaultdict(int) for position in survivable[1]: next[position] = set([p for p in prev[2] if position in prev[2][p]]) next_count[position] = len(next[position]) print("next count", next_count) if max(next_count.values()) > 1: for position in survivable[1]: if next_count[position] == 1: risky_action = self._get_direction(my_position, position) is_survivable[risky_action] = False survivable_actions = [a for a in is_survivable if is_survivable[a]] """ # Do not stay on a bomb if I can if all([ obs["bomb_life"][my_position] > 0, len(survivable_actions) > 1, is_survivable[constants.Action.Stop] ]): is_survivable[constants.Action.Stop] = False survivable_actions = [a for a in is_survivable if is_survivable[a]] if len(survivable_actions) == 0: print("Must die") return None elif len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value # TODO : shoud check the survivability of all agents in one method # Place a bomb if # - it does not significantly reduce my survivability # - it can reduce the survivability of enemies consider_bomb = True if survivable_with_bomb is None: consider_bomb = False elif any([len(s) <= 2 for s in survivable_with_bomb[1:]]): # if not sufficiently survivable all the time after bomb, do not bomb consider_bomb = False if consider_bomb: # place bomb if can reach fog/enemy if self._can_break(info["list_boards_no_move"][-1], my_position, my_blast_strength, [constants.Item.Fog] + my_enemies): print("Bomb to break fog/enemy", constants.Action.Bomb) print(info["list_boards_no_move"][-1]) return constants.Action.Bomb.value for enemy in my_enemies: # check if the enemy is reachable if len(reachable_items_no_move[enemy]) == 0: continue # can reach the enemy at enemy_position in enemy_time step enemy_time = reachable_items_no_move[enemy][0][0] enemy_position = reachable_items_no_move[enemy][0][1:3] # check if placing a bomb can reduce the survivability # of the enemy survivable_before, _ = self._search_time_expanded_network( info["list_boards_no_move"], enemy_position) board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(info["curr_bombs"]) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, info["curr_flames"], self._search_range, my_position, enemy_mobility=0) survivable_after, _ \ = self._search_time_expanded_network(list_boards_with_bomb, enemy_position) good_before = np.array([len(s) for s in survivable_before]) good_after = np.array([len(s) for s in survivable_after]) # TODO : what are good criteria? if any(good_after < good_before): # place a bomb if it makes sense print("Bomb to kill an enemy", constants.Action.Bomb) print("before", good_before) print("after ", good_after) print([len(s) for s in survivable]) print([len(s) for s in survivable_with_bomb]) return constants.Action.Bomb.value """ # find direction towards enemy positions = set([x[1:3] for x in next_to_items_no_move[enemy]]) for t in range(enemy_time, 1, -1): _positions = set() for position in positions: _positions = _positions.union(prev_no_move[t][position]) positions = _positions.copy() if enemy_time <= my_blast_strength: #if True: positions.add(my_position) positions_after_bomb = set(survivable[1]).difference(positions) if positions_after_bomb: print("Bomb to kill an enemy", enemy, constants.Action.Bomb) return constants.Action.Bomb.value """ # if I can kick, consider placing a bomb to kick if my_kick and my_position in survivable_with_bomb[3]: # consdier a sequence of actions: place bomb -> move (action) -> move back (kick) for action in [ constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: if not is_survivable[action]: continue if action == constants.Action.Up: # kick direction is down dx = 1 dy = 0 elif action == constants.Action.Down: # kick direction is up dx = -1 dy = 0 elif action == constants.Action.Left: # kick direction is right dx = 0 dy = 1 elif action == constants.Action.Right: # kick direction is left dx = 0 dy = -1 else: raise ValueError() _next_position = (my_position[0] + dx, my_position[1] + dy) if not self._on_board(_next_position): continue else: next_position = _next_position # Find where the bomb stops if kicked for t in range(int(obs["bomb_life"][my_position]) - 2): if not utility.position_is_passage( board, next_position): break _next_position = (next_position[0] + dx, next_position[1] + dy) if not self._on_board(_next_position): break else: next_position = _next_position if utility.position_is_fog(board, next_position): print("Bomb to kick into fog", action) return constants.Action.Bomb.value elif utility.position_is_enemy(list_boards[t + 2], next_position, my_enemies): print("Bomb to kick towards enemy", action) return constants.Action.Bomb.value """ x0, y0 = my_position positions_against = [(2*x0-x, 2*y0-y) for (x, y) in positions] positions_after_bomb = set(survivable[1]).intersection(positions_against) if positions_after_bomb: print("Bomb to kick", enemy, constants.Action.Bomb) return constants.Action.Bomb.value """ # kick if len(kickable) > 0: while kickable: # then consider what happens if I kick a bomb next_position = kickable.pop() # do not kick a bomb if it will break enemies if info["moving_direction"][next_position] is None: # if it is a static bomb if self._can_break(info["list_boards_no_move"][0], next_position, my_blast_strength, my_enemies): continue my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=3) survivable_with_kick, prev_kick \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: print("Kicking", my_action) return my_action.value # if on a bomb, consider where to kick in the following step if obs["bomb_life"][my_position] > 0: # For each survivable move in the next step, # check what happens if we kick in the following step. # If the bomb is kicked into a fog, plan to kick. # If the bomb is kicked toward an enemy, plan to kick. # Otherwise, do not plan to kick. for action in [ constants.Action.Up, constants.Action.Down, constants.Action.Left, constants.Action.Right ]: if not is_survivable[action]: continue if action == constants.Action.Up: # kick direction is down dx = 1 dy = 0 elif action == constants.Action.Down: # kick direction is up dx = -1 dy = 0 elif action == constants.Action.Left: # kick direction is right dx = 0 dy = 1 elif action == constants.Action.Right: # kick direction is left dx = 0 dy = -1 else: raise ValueError() _next_position = (my_position[0] + dx, my_position[1] + dy) if not self._on_board(_next_position): continue else: next_position = _next_position # Find where the bomb stops if kicked for t in range(int(obs["bomb_life"][my_position]) - 1): if not utility.position_is_passage(board, next_position): break _next_position = (next_position[0] + dx, next_position[1] + dy) if not self._on_board(_next_position): break else: next_position = _next_position if utility.position_is_fog(board, next_position): print("Moving to kick into fog", action) return action.value elif utility.position_is_enemy(list_boards[t + 2], next_position, my_enemies): print("Moving to kick towards enemy", action) # Move towards an enemy good_time_positions = set() for enemy in my_enemies: good_time_positions = good_time_positions.union( next_to_items[enemy]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward enemy", action) return action.value # # Random action # action = random.choice(survivable_actions) print("Random action", action) return action.value
def _get_bombs(self, board, bomb_blast_strength, prev_bomb_blast_strength, bomb_life, prev_bomb_life): """ Summarize information about bombs Parameters ---------- board : array bomb_blast_strength : array bomb_life : array prev_bomb_life : array remaining life of bombs at the previous step Return ------ curr_bombs : list list of bombs moving_direction : array array of moving direction of bombs moving_direction[position] : direction of bomb at position bomb_life : array Copy the remaining life of bombs for the next step """ # Keep bombs under fog bomb_positions_under_fog = np.where( (prev_bomb_life > 1) * (board == constants.Item.Fog.value)) bomb_life[bomb_positions_under_fog] = prev_bomb_life[ bomb_positions_under_fog] - 1 bomb_blast_strength[ bomb_positions_under_fog] = prev_bomb_blast_strength[ bomb_positions_under_fog] # Prepare information about moving bombs # diff = 0 if no bomb -> no bomb # diff = 1 if the remaining life of a bomb is decremented # diff = -9 if no bomb -> new bomb diff = prev_bomb_life - bomb_life moving = (diff != 0) * (diff != 1) * (diff != -9) # move_from: previous positions of moving bombs rows, cols = np.where(moving * (diff > 0)) move_from = [position for position in zip(rows, cols)] # move_to: current positions of moving bombs rows, cols = np.where(moving * (diff < 0)) move_to = [position for position in zip(rows, cols)] # TODO : Consider bombs moving into fog matched_move_from = [False] * len(move_from) curr_bombs = list() rows, cols = np.where(bomb_life > 0) moving_direction = np.full(self.board_shape, None) for position in zip(rows, cols): this_bomb_life = bomb_life[position] if position in move_to: # then the bomb is moving, so find the moving direction for i, prev_position in enumerate(move_from): if prev_bomb_life[prev_position] != this_bomb_life + 1: # the previous life of the bomb at the previous position # must be +1 of the life of this bomb continue dx = position[0] - prev_position[0] dy = position[1] - prev_position[1] if abs(dx) + abs(dy) == 2: # this can be a moving bomb whose direction is changed by kick agent_position = (prev_position[0] + dx, prev_position[1]) if utility.position_is_agent(board, agent_position): # the agent must have kicked print("agent must have kicked at", agent_position) moving_direction[position] = self._get_direction( agent_position, position) break agent_position = (prev_position[0], prev_position[1] + dy) if utility.position_is_agent(board, agent_position): # the agent must have kicked print("agent must have kicked at", agent_position) moving_direction[position] = self._get_direction( agent_position, position) break if abs(dx) + abs(dy) != 1: # the previous position must be 1 manhattan distance # from this position continue moving_direction[position] = self._get_direction( prev_position, position) # TODO: there might be multiple possibilities of # where the bomb came from matched_move_from[i] = True break bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb position, this_bomb_life, int(bomb_blast_strength[position]), moving_direction[position]) curr_bombs.append(bomb) return curr_bombs, moving_direction
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] enemy_position = dict() for enemy in my_enemies: positions = np.argwhere(board == enemy.value) if len(positions) == 0: continue enemy_position[enemy] = tuple(positions[0]) survivable_steps = defaultdict(int) # # survivable tree in standard case # list_boards_no_kick = deepcopy(info["list_boards_no_move"]) # remove myself if obs["bomb_blast_strength"][my_position]: for b in list_boards_no_kick: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in list_boards_no_kick: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value my_survivable, my_prev, my_succ, my_survivable_with_enemy \ = self._get_survivable_with_enemy(list_boards_no_kick, my_position, enemy_position) life = defaultdict(int) for t in range(self._search_range, 0, -1): for position in my_survivable_with_enemy[t]: if not life[(t, ) + position]: life[(t, ) + position] = t for prev_position in my_prev[t][position]: life[(t - 1, ) + prev_position] = max([ life[(t - 1, ) + prev_position], life[(t, ) + position] ]) for next_position in my_survivable[1]: my_action = self._get_direction(my_position, next_position) survivable_steps[my_action] = life[(1, ) + next_position] # # survivable tree if I lay bomb # if all([obs["ammo"] > 0, obs["bomb_life"][my_position] == 0]): # if I can lay a bomb board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(info["curr_bombs"]) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, info["curr_flames"], self._search_range, my_position, enemy_mobility=0) my_survivable_with_bomb, my_prev_with_bomb, my_succ_with_bomb, my_survivable_with_bomb_enemy \ = self._get_survivable_with_enemy(list_boards_with_bomb, my_position, enemy_position) life = defaultdict(int) for t in range(self._search_range, 0, -1): for position in my_survivable_with_bomb_enemy[t]: if not life[(t, ) + position]: life[(t, ) + position] = t for prev_position in my_prev_with_bomb[t][position]: life[(t - 1, ) + prev_position] = max([ life[(t - 1, ) + prev_position], life[(t, ) + position] ]) survivable_steps[constants.Action.Bomb] = life[(1, ) + my_position] print("survivable steps") print(survivable_steps) if survivable_steps: values = np.array(list(survivable_steps.values())) print(values) best_index = np.where(values == np.max(values)) best_actions = np.array(list(survivable_steps.keys()))[best_index] best_action = random.choice(best_actions) print("Most survivable action", best_action) return best_action.value else: print("No actions: stop") return constants.Action.Stop.value # # survivable tree if I kick # if my_kick: # Positions where I kick a bomb if I move to kickable, more_kickable = self._kickable_positions( obs, info["moving_direction"]) for next_position in set.union(*[kickable, more_kickable]): # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) my_survivable_with_kick[next_position], my_prev_with_kick[next_position], my_succ_with_bomb[next_position], my_survivable_with_kick_enemy[next_position] \ = self._get_survivable_with_enemy(list_boards_with_kick[1:], next_position, enemy_position) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(my_survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=0) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( my_succ, 1, next_position) most_survivable_action = None if survivable_actions: survivable_score = dict() for action in n_survivable: #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum( [n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break if most_survivable_action is not None: print("Most survivable action", most_survivable_action) return most_survivable_action.value # kick if possible if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print("Kickable", my_kick, kickable) while kickable: next_position = kickable.pop() action = self._get_direction(my_position, next_position) print("Must kick to survive", action) return action.value # move towards a teammate if she is blocking for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility._position_is_item(board, next_position, my_teammate): print("Must move to teammate to survive", action) return action.value # move towards an enemy for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_enemy(board, next_position, my_enemies): print("Must move to enemy to survive", action) return action.value # move towards anywhere besides ridid for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_rigid(board, next_position): continue if utility.position_is_wood(board, next_position): continue if utility.position_is_bomb(info["curr_bombs"], next_position): continue print("Try moving to survive", action) return action.value action = constants.Action.Stop print("Must die", action) return action