def _what_to_break(cls, board, my_position, blast_strength): x, y = my_position to_break = list() # To up for dx in range(1, blast_strength): if x + dx >= len(board[0]): break position = (x + dx, y) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To down for dx in range(1, blast_strength): if x - dx < 0: break position = (x - dx, y) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To right for dy in range(1, blast_strength): if y + dy >= len(board): break position = (x, y + dy) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break # To left for dy in range(1, blast_strength): if y - dy < 0: break position = (x, y - dy) if utility.position_is_rigid(board, position): # stop searching this direction break elif utility.position_is_wood( board, position) or utility.position_is_agent( board, position): to_break.append(constants.Item(board[position])) break return to_break
def get_observation_state(self, board, pos, enemies, bomb_map): """ Need just the board layout to decide everything board -> np.array pos -> tuple enemies -> list """ def convert_bombs(bomb_map): '''Flatten outs the bomb array''' ret = [] locations = np.where(bomb_map > 0) for r, c in zip(locations[0], locations[1]): ret.append(crazy_util.dotdict({ 'position': (r, c), 'blast_strength': int(bomb_map[(r, c)]) })) return ret bombs = convert_bombs(np.array(bomb_map)) has_bomb = False has_enemy = False has_wood = False los_bomb = False x, y = pos dirX = [-1,0,1,0] dirY = [0,1,0,-1] for k1 in dirX: for k2 in dirY: newX = x+k1 newY = y+k2 # print((newX, newY), board.shape) if newX < board.shape[0] and newY < board.shape[1] and newX >=0 and newY >= 0: if utility.position_is_bomb(bombs, (newX, newY)): has_bomb = True if utility.position_is_wood(board, (newX, newY)): has_wood = True if utility.position_is_enemy(board, pos, enemies): has_enemy = True for k1 in range(0, board.shape[0]): if utility.position_is_bomb(bombs, (k1, y)): los_bomb = True elif utility.position_is_bomb(bombs, (x, k1)): los_bomb = True if utility.position_is_bomb(bombs, (x,y)): has_bomb = True if has_bomb: return 0 elif los_bomb: return 4 elif has_enemy: return 1 elif has_wood: return 2 else: return 3
def get_observation_state(self, board, pos, enemies, bomb_map, bomb_life, ammo): """ Need just the board layout to decide everything board -> np.array pos -> tuple enemies -> list """ bombs = self.convert_bombs(np.array(bomb_map), np.array(bomb_life)) has_bomb = False has_enemy = False has_wood = False los_bomb = False has_ammo = False if ammo > 0: has_ammo = True x, y = pos dirX = [-1, 1, 0, 0] dirY = [0, 0, -1, 1] for k in range(0, len(dirX)): newX = x + dirX[k] newY = y + dirY[k] # print((newX, newY), board.shape) if newX < board.shape[0] and newY < board.shape[ 1] and newX >= 0 and newY >= 0: if utility.position_is_bomb(bombs, (newX, newY)): has_bomb = True if utility.position_is_wood(board, (newX, newY)): has_wood = True if utility.position_is_enemy(board, pos, enemies): has_enemy = True los_bomb = self.check_bomb((newX, newY), bombs) if utility.position_is_bomb(bombs, (x, y)): has_bomb = True state = 3 if has_bomb: state = 0 elif los_bomb: state = 4 elif has_enemy: state = 1 elif has_wood: state = 2 else: state = 3 if has_ammo: state = 2 * state return state
def get_observation_state(self, board, pos, enemies, bomb_map, ammo): """ Need just the board layout to decide everything board -> np.array pos -> tuple enemies -> list """ bombs = self.convert_bombs(np.array(bomb_map)) has_bomb = False has_enemy = False has_wood = False los_bomb = False has_ammo = False if ammo > 0: has_ammo = True x, y = pos dirX = [-1, 1, 0, 0] dirY = [0, 0, -1, 1] for k in range(0, len(dirX)): newX = x + dirX[k] newY = y + dirY[k] # print((newX, newY), board.shape) if newX < board.shape[0] and newY < board.shape[ 1] and newX >= 0 and newY >= 0: if utility.position_is_bomb(bombs, (newX, newY)): has_bomb = True if utility.position_is_wood(board, (newX, newY)): has_wood = True if utility.position_is_enemy(board, pos, enemies): has_enemy = True for bomb in bombs: if ((abs(newX - bomb['position'][0]) <= bomb['blast_strength'] and newY == bomb['position'][1]) or (abs(newY - bomb['position'][1]) <= bomb['blast_strength'] and newX == bomb['position'][0])): los_bomb = True if utility.position_is_bomb(bombs, (x, y)): has_bomb = True return State(has_bomb, has_enemy, has_wood, los_bomb, has_ammo)
def _can_break_wood(cls, board, my_position, blast_strength): """ Whether one cay break a wood by placing a bomb at my position Parameters ---------- board : array board my_position : tuple where to place a bomb blast_strength : int strength of the bomb Return ------ boolean True iff can break a wood by placing a bomb """ x, y = my_position # To up for dx in range(1, blast_strength): if x + dx >= len(board[0]): break position = (x + dx, y) if utility.position_is_wood(board, position): return True elif not utility.position_is_passage(board, position): # stop searching this direction break # To down for dx in range(1, blast_strength): if x - dx < 0: break position = (x - dx, y) if utility.position_is_wood(board, position): return True elif not utility.position_is_passage(board, position): # stop searching this direction break # To right for dy in range(1, blast_strength): if y + dy >= len(board): break position = (x, y + dy) if utility.position_is_wood(board, position): return True elif not utility.position_is_passage(board, position): # stop searching this direction break # To left for dy in range(1, blast_strength): if y - dy < 0: break position = (x, y - dy) if utility.position_is_wood(board, position): return True elif not utility.position_is_passage(board, position): # stop searching this direction break return False
def _find_reachable_items(self, list_boards, my_position, time_positions): """ Find items reachable from my position Parameters ---------- list_boards : list list of boards, generated by _board_sequence my_position : tuple my position, where the search starts time_positions : list survivable time-positions, generated by _search_time_expanded_network Return ------ items : dict items[item] : list of time-positions from which one can reach item reached : array minimum time to reach each position on the board next_to_items : dict next_to_items[item] : list of time-positions from which one can reach the position next to item """ # items found on time_positions and the boundary (for Wood) items = defaultdict(list) # reached[position] : minimum time to reach the position reached = np.full(self.board_shape, np.inf) # whether already checked the position _checked = np.full(self.board_shape, False) # positions next to wood or other agents (count twice if next to two woods) next_to_items = defaultdict(list) for t, positions in enumerate(time_positions): # check the positions reached at time t board = list_boards[t] for position in positions: if reached[position] < np.inf: continue reached[position] = t item = constants.Item(board[position]) items[item].append((t,) + position) _checked[position] = True x, y = position for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: next_position = (x + row, y + col) if not self._on_board(next_position): continue if _checked[next_position]: continue _checked[next_position] = True if utility.position_is_agent(board, next_position): item = constants.Item(board[next_position]) items[item].append((t,)+next_position) next_to_items[item].append((t,) + position) # ignoring wall that will not exist when explode if utility.position_is_wood(list_boards[-1], next_position): item = constants.Item(board[next_position]) items[item].append((t,)+next_position) next_to_items[item].append((t,) + position) return items, reached, next_to_items
def act(self, obs, action_space): def convert_bombs(bomb_map): ret = [] locations = np.where(bomb_map > 0) for r, c in zip(locations[0], locations[1]): ret.append({ 'position': (r, c), 'blast_strength': int(bomb_map[(r, c)]) }) return ret depth = 20 my_position = tuple(obs['position']) board = np.array(obs['board']) bombs = convert_bombs(np.array(obs['bomb_blast_strength'])) enemies = [constants.Item(e) for e in obs['enemies']] ammo = int(obs['ammo']) blast_strength = int(obs['blast_strength']) if self.prev_pos != None: if self.prev_pos == my_position: if 1 <= self.prev_action.value <= 4: if self.logging: print('freeze') board[self.prev_pos] = constants.Item.Rigid.value items, dist, prev = self._djikstra(board, my_position, bombs, enemies, bomb_timer=self.bomb_time, depth=depth) if self.logging: print('my_position =', my_position) print('board =') print(board) print('dist =') print(dist) print('bombs =', bombs) print('enemies =', enemies) for e in enemies: print(e) pos = items.get(e, []) print('pos =', pos) print('pos_len=', len(pos)) if len(pos) > 0: print('xy=', pos[0][0], ',', pos[0][1]) # print('pos_r =', x, ',',y) print('ammo =', ammo) print('blast_strength =', blast_strength) test_ary = np.ones((11, 11)) for c in range(11): for r in range(11): if (r, c) in dist: test_ary[r, c] = dist[(r, c)] else: test_ary[r, c] = -1 if self.logging: print("dist_mat:") print(test_ary) # update bomb_time map bomb_life = 8 has_bomb = {} already_breakable = np.zeros((11, 11)) for b in bombs: r, c = b['position'] strength = b['blast_strength'] # print('bomb_cr =', c, 'r=', r, 'st=', strength) if self.bomb_time[(r, c)] == 0: self.bomb_time[(r, c)] = bomb_life else: self.bomb_time[(r, c)] -= 1 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, strength): new_pos = (r + d * row, c + d * col) if TestSimpleAgent._out_of_board(new_pos): continue # if new_pos[0] < 0 or new_pos[0] > 10: # continue # if new_pos[1] < 0 or new_pos[1] > 10: # continue if utility.position_is_rigid(board, new_pos): continue if utility.position_is_wood(board, new_pos): already_breakable[new_pos] = 1 if self.bomb_time[new_pos] == 0: self.bomb_time[new_pos] = bomb_life else: self.bomb_time[new_pos] -= 1 has_bomb[new_pos] = 1 # clear up table for c in range(11): for r in range(11): if (r, c) not in has_bomb: self.bomb_time[(r, c)] = 0 if self.logging: print("bomb_time:") print(self.bomb_time) # evaluate each position in terms of breakable woods num_breakable = np.zeros((11, 11)) num_breakable_inside = np.zeros((11, 11)) for c in range(11): for r in range(11): if utility.position_is_wood(board, (r, c)): if already_breakable[(r, c)]: continue for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, blast_strength): new_pos = (r + d * row, c + d * col) if TestSimpleAgent._out_of_board(new_pos): continue if utility.position_is_passable( board, new_pos, enemies) or utility.position_is_flames( board, new_pos): num_breakable[new_pos] += 1 else: break tmp_num = 0 has_passable = False for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if TestSimpleAgent._out_of_board(new_pos): continue if utility.position_is_wood(board, new_pos): tmp_num += 1 elif utility.position_is_passable( board, new_pos, enemies): has_passable = True if (not has_passable) and tmp_num > 0: tmp_num -= 1 num_breakable_inside[(r, c)] = tmp_num if self.logging: print('num_breakable:') print(num_breakable) print('num_breakable_inside:') print(num_breakable_inside) num_breakable_total = np.zeros((11, 11)) for c in range(11): for r in range(11): num_breakable_total[(r, c)] = num_breakable[(r, c)] if num_breakable_total[(r, c)] == -1 or num_breakable_total[( r, c)] == np.inf: continue for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue num_breakable_total[( r, c)] += num_breakable_inside[new_pos] * 0.5 if self.logging: print('num_breakable_total:') print(num_breakable_total) # evaluate each position in total pos_scores = np.zeros((11, 11)) for c in range(11): for r in range(11): if (r, c) not in dist: pos_scores[(r, c)] = -1 continue elif dist[(r, c)] == np.inf: pos_scores[(r, c)] = np.inf continue if num_breakable_total[(r, c)] > 0: pos_scores[(r, c)] += num_breakable_total[(r, c)] pos_scores[(r, c)] += (depth - dist[(r, c)]) * 0.2 # consider power-up items if board[(r, c)] in { constants.Item.ExtraBomb.value, constants.Item.IncrRange.value }: pos_scores[(r, c)] += 50 if self.logging: print('pos_score:') print(pos_scores) # consider degree of freedom dis_to_ene = 100 for e in enemies: pos = items.get(e, []) if len(pos) > 0: d = abs(pos[0][0] - my_position[0]) + abs(pos[0][1] - my_position[1]) if dis_to_ene > d: dis_to_ene = d if dis_to_ene <= -4: # if direction is not None: deg_frees = np.zeros((11, 11)) for c in range(11): for r in range(11): # if pos_scores[(r, c)] == np.inf: # continue if not utility.position_is_passable( board, (r, c), enemies): continue deg_free = 0 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_pos = (r + row, c + col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if utility.position_is_passable( board, new_pos, enemies) or utility.position_is_flames( board, new_pos): deg_free += 1 deg_frees[(r, c)] = deg_free if deg_free <= 1: pos_scores[(r, c)] -= 5 if self.logging: print('deg_free') print(deg_frees) # consider bomb blast for i in range(len(bombs)): r, c = bombs[i]['position'] strength = bombs[i]['blast_strength'] pos_scores[(r, c)] = -20 for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, strength): new_pos = (r + d * row, c + d * col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if new_pos not in dist: continue elif new_pos == np.inf: continue pos_scores[new_pos] = -20 if self.logging: print('consider blast pos_score:') print(pos_scores) # consider enemies for e in enemies: pos = items.get(e, []) if len(pos) > 0: r = pos[0][0] c = pos[0][1] for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: for d in range(1, blast_strength * 2): new_pos = (r + d * row, c + d * col) if new_pos[0] < 0 or new_pos[0] > 10: continue if new_pos[1] < 0 or new_pos[1] > 10: continue if not utility.position_is_passable( board, new_pos, enemies): break pos_scores[new_pos] += 0.3 if self.logging: print('consider enemy:') print(pos_scores) h_r, h_c = -1, -1 h_score = -1 for c in range(11): for r in range(11): if (r, c) not in dist: continue elif dist[(r, c)] == np.inf: continue if h_score < pos_scores[(r, c)]: h_score = pos_scores[(r, c)] h_r, h_c = (r, c) if self.logging: print('h_score and pos:', h_score, '(r, c) =', h_r, ',', h_c) print('prev:') print(prev) # if current position is not the highest score position, move to the highest position. if h_r == -1: # print('action: Stop') self.prev_action = constants.Action.Stop # return constants.Action.Stop.value elif pos_scores[my_position] == h_score: if self._can_escape(pos_scores, my_position, blast_strength): # print('set bomb') self.prev_action = constants.Action.Bomb # return constants.Action.Bomb.value else: # print('action: Stop2') self.prev_action = constants.Action.Stop # return constants.Action.Stop.value else: # print('action: backtrack') self.prev_action = self._backtrack(my_position, (h_r, h_c), prev) # return self._backtrack(my_position, (h_r, h_c), prev) self.prev_pos = my_position if self.logging: print('action: ', self.prev_action) return self.prev_action.value # Move if we are in an unsafe place. unsafe_directions = self._directions_in_range_of_bomb( board, my_position, bombs, dist) if unsafe_directions: directions = self._find_safe_directions(board, my_position, unsafe_directions, bombs, enemies) return random.choice(directions).value # Lay pomme if we are adjacent to an enemy. if self._is_adjacent_enemy(items, dist, enemies) and self._maybe_bomb( ammo, blast_strength, items, dist, my_position): return constants.Action.Bomb.value # Move towards an enemy if there is one in exactly three reachable spaces. direction = self._near_enemy(my_position, items, dist, prev, enemies, 3) if direction is not None and (self._prev_direction != direction or random.random() < .5): self._prev_direction = direction return direction.value # Move towards a good item if there is one within two reachable spaces. direction = self._near_good_powerup(my_position, items, dist, prev, 2) if direction is not None: return direction.value # Maybe lay a bomb if we are within a space of a wooden wall. if self._near_wood(my_position, items, dist, prev, 1): if self._maybe_bomb(ammo, blast_strength, items, dist, my_position): return constants.Action.Bomb.value else: return constants.Action.Stop.value # Move towards a wooden wall if there is one within two reachable spaces and you have a bomb. direction = self._near_wood(my_position, items, dist, prev, 2) if direction is not None: directions = self._filter_unsafe_directions( board, my_position, [direction], bombs) if directions: return directions[0].value # Choose a random but valid direction. directions = [ constants.Action.Stop, constants.Action.Left, constants.Action.Right, constants.Action.Up, constants.Action.Down ] valid_directions = self._filter_invalid_directions( board, my_position, directions, enemies) directions = self._filter_unsafe_directions(board, my_position, valid_directions, bombs) directions = self._filter_recently_visited( directions, my_position, self._recently_visited_positions) if len(directions) > 1: directions = [k for k in directions if k != constants.Action.Stop] if not len(directions): directions = [constants.Action.Stop] # Add this position to the recently visited uninteresting positions so we don't return immediately. self._recently_visited_positions.append(my_position) self._recently_visited_positions = self._recently_visited_positions[ -self._recently_visited_length:] return random.choice(directions).value
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(info["list_boards_no_move"], my_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=0) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) # do not kick into fog dx = next_position[0] - my_position[0] dy = next_position[1] - my_position[1] position = next_position is_fog = False while self._on_board(position): if utility.position_is_fog(board, position): is_fog = True break position = (position[0] + dx, position[1] + dy) if is_fog: continue list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( succ, 1, next_position) most_survivable_action = None if survivable_actions: survivable_score = dict() for action in n_survivable: #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum( [n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break if most_survivable_action is not None: print("Most survivable action", most_survivable_action) return most_survivable_action.value # kick if possible if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print("Kickable", my_kick, kickable) while kickable: next_position = kickable.pop() action = self._get_direction(my_position, next_position) print("Must kick to survive", action) return action.value # move towards a teammate if she is blocking for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility._position_is_item(board, next_position, my_teammate): print("Must move to teammate to survive", action) return action.value # move towards an enemy for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_enemy(board, next_position, my_enemies): print("Must move to enemy to survive", action) return action.value # move towards anywhere besides ridid for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_rigid(board, next_position): continue if utility.position_is_wood(board, next_position): continue if utility.position_is_bomb(info["curr_bombs"], next_position): continue print("Try moving to survive", action) return action.value action = constants.Action.Stop print("Must die", action) return action
def act(self, obs, action_space, info): # # Definitions # board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_kick = obs["can_kick"] # whether I can kick my_enemies = [constants.Item(e) for e in obs['enemies']] my_teammate = obs["teammate"] my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] enemy_position = dict() for enemy in my_enemies: positions = np.argwhere(board == enemy.value) if len(positions) == 0: continue enemy_position[enemy] = tuple(positions[0]) survivable_steps = defaultdict(int) # # survivable tree in standard case # list_boards_no_kick = deepcopy(info["list_boards_no_move"]) # remove myself if obs["bomb_blast_strength"][my_position]: for b in list_boards_no_kick: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Bomb.value else: for b in list_boards_no_kick: if utility.position_is_agent(b, my_position): b[my_position] = constants.Item.Passage.value my_survivable, my_prev, my_succ, my_survivable_with_enemy \ = self._get_survivable_with_enemy(list_boards_no_kick, my_position, enemy_position) life = defaultdict(int) for t in range(self._search_range, 0, -1): for position in my_survivable_with_enemy[t]: if not life[(t, ) + position]: life[(t, ) + position] = t for prev_position in my_prev[t][position]: life[(t - 1, ) + prev_position] = max([ life[(t - 1, ) + prev_position], life[(t, ) + position] ]) for next_position in my_survivable[1]: my_action = self._get_direction(my_position, next_position) survivable_steps[my_action] = life[(1, ) + next_position] # # survivable tree if I lay bomb # if all([obs["ammo"] > 0, obs["bomb_life"][my_position] == 0]): # if I can lay a bomb board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(info["curr_bombs"]) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb( characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, info["curr_flames"], self._search_range, my_position, enemy_mobility=0) my_survivable_with_bomb, my_prev_with_bomb, my_succ_with_bomb, my_survivable_with_bomb_enemy \ = self._get_survivable_with_enemy(list_boards_with_bomb, my_position, enemy_position) life = defaultdict(int) for t in range(self._search_range, 0, -1): for position in my_survivable_with_bomb_enemy[t]: if not life[(t, ) + position]: life[(t, ) + position] = t for prev_position in my_prev_with_bomb[t][position]: life[(t - 1, ) + prev_position] = max([ life[(t - 1, ) + prev_position], life[(t, ) + position] ]) survivable_steps[constants.Action.Bomb] = life[(1, ) + my_position] print("survivable steps") print(survivable_steps) if survivable_steps: values = np.array(list(survivable_steps.values())) print(values) best_index = np.where(values == np.max(values)) best_actions = np.array(list(survivable_steps.keys()))[best_index] best_action = random.choice(best_actions) print("Most survivable action", best_action) return best_action.value else: print("No actions: stop") return constants.Action.Stop.value # # survivable tree if I kick # if my_kick: # Positions where I kick a bomb if I move to kickable, more_kickable = self._kickable_positions( obs, info["moving_direction"]) for next_position in set.union(*[kickable, more_kickable]): # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) my_survivable_with_kick[next_position], my_prev_with_kick[next_position], my_succ_with_bomb[next_position], my_survivable_with_kick_enemy[next_position] \ = self._get_survivable_with_enemy(list_boards_with_kick[1:], next_position, enemy_position) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(my_survivable, obs, info["curr_bombs"], info["curr_flames"], enemy_mobility=0) survivable_actions = [a for a in is_survivable if is_survivable[a]] n_survivable = dict() kick_actions = list() if my_kick: # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, info["moving_direction"]) for next_position in kickable: # consider what happens if I kick a bomb my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], info["curr_bombs"], info["curr_flames"], self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=0) #print(list_boards_with_kick) survivable_with_kick, prev_kick, succ_kick, _ \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: survivable_actions.append(my_action) is_survivable[my_action] = True n_survivable[my_action] = [1] + [ len(s) for s in survivable_with_kick[1:] ] kick_actions.append(my_action) else: kickable = set() x, y = my_position for action in survivable_actions: # for each survivable action, check the survivability if action == constants.Action.Bomb: n_survivable[action] = [ len(s) for s in survivable_with_bomb[1:] ] continue if action == constants.Action.Up: dx = -1 dy = 0 elif action == constants.Action.Down: dx = 1 dy = 0 elif action == constants.Action.Left: dx = 0 dy = -1 elif action == constants.Action.Right: dx = 0 dy = 1 elif action == constants.Action.Stop: dx = 0 dy = 0 else: raise ValueError() next_position = (x + dx, y + dy) n_survivable[action], _info = self._count_survivable( my_succ, 1, next_position) most_survivable_action = None if survivable_actions: survivable_score = dict() for action in n_survivable: #survivable_score[action] = sum([-n**(-5) for n in n_survivable[action]]) survivable_score[action] = sum( [n for n in n_survivable[action]]) if verbose: print(action, survivable_score[action], n_survivable[action]) best_survivable_score = max(survivable_score.values()) random.shuffle(survivable_actions) for action in survivable_actions: if survivable_score[action] == best_survivable_score: most_survivable_action = action break if most_survivable_action is not None: print("Most survivable action", most_survivable_action) return most_survivable_action.value # kick if possible if my_kick: kickable = self._kickable_positions(obs, info["moving_direction"]) else: kickable = set() print("Kickable", my_kick, kickable) while kickable: next_position = kickable.pop() action = self._get_direction(my_position, next_position) print("Must kick to survive", action) return action.value # move towards a teammate if she is blocking for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility._position_is_item(board, next_position, my_teammate): print("Must move to teammate to survive", action) return action.value # move towards an enemy for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_enemy(board, next_position, my_enemies): print("Must move to enemy to survive", action) return action.value # move towards anywhere besides ridid for action in [ constants.Action.Right, constants.Action.Left, constants.Action.Down, constants.Action.Up ]: next_position = utility.get_next_position(my_position, action) if not self._on_board(next_position): continue if utility.position_is_rigid(board, next_position): continue if utility.position_is_wood(board, next_position): continue if utility.position_is_bomb(info["curr_bombs"], next_position): continue print("Try moving to survive", action) return action.value action = constants.Action.Stop print("Must die", action) return action
def act(self, obs, action_space, info): # # Definitions # self._search_range = 10 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] if verbose: print("My position", my_position, end="\t") # List of the set of survivable time-positions at each time # and preceding positions survivable, prev, succ, _ \ = self._search_time_expanded_network(info["list_boards_no_move"], my_position) # where to place bombs to break wood bomb_target = np.full(board.shape, False) digging = None if board[my_position] == constants.Item.Agent0.value: for n in [4, 5, 6]: if utility.position_is_wood(info["last_seen"], (1, n)): bomb_target[(1, n - 1)] = True digging = (1, n - 1) break elif board[my_position] == constants.Item.Agent1.value: for m in [6, 5, 4]: if utility.position_is_wood(info["last_seen"], (m, 1)): bomb_target[(m + 1, 1)] = True digging = (m + 1, 1) break elif board[my_position] == constants.Item.Agent2.value: for m in [6, 5, 4]: if utility.position_is_wood(info["last_seen"], (m, 9)): bomb_target[(m + 1, 9)] = True digging = (m + 1, 9) break elif board[my_position] == constants.Item.Agent3.value: for n in [6, 5, 4]: if utility.position_is_wood(info["last_seen"], (1, n)): bomb_target[(1, n + 1)] = True digging = (1, n + 1) break if digging is None: bomb_target, n_breakable \ = self._get_bomb_target(info["list_boards_no_move"][-1], my_position, my_blast_strength, constants.Item.Wood) # Items that can be reached in a survivable manner reachable_items, _, next_to_items \ = self._find_reachable_items(info["list_boards_no_move"], my_position, survivable, bomb_target) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, info["curr_bombs"], info["curr_flames"]) survivable_actions = [a for a in is_survivable if is_survivable[a]] if verbose: print("survivable actions are", survivable_actions) # # Choose an action # if len(survivable_actions) == 0: # This should not happen return None elif len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value # # Place a bomb # consider_bomb = True if survivable_with_bomb is None: consider_bomb = False elif not bomb_target[my_position]: consider_bomb = False elif any([len(s) <= 0 for s in survivable_with_bomb]): # if not survivable all the time after bomb, do not bomb consider_bomb = False elif self._might_break_powerup(info["list_boards_no_move"][-1], my_position, my_blast_strength, info["might_powerup"]): # if might break an item, do not bomb consider_bomb = False if consider_bomb: # place bomb if I am at a bomb target print("Bomb at a bomb target", constants.Action.Bomb) return constants.Action.Bomb.value good_time_positions = reachable_items["target"] if digging and good_time_positions: time_to_reach = good_time_positions[0][0] if any([ my_ammo and board[digging] in [ constants.Item.Passage.value, constants.Item.ExtraBomb.value, constants.Item.IncrRange.value, constants.Item.Kick.value ], info["flame_life"][digging] <= time_to_reach and utility.position_is_flames(board, digging) ]): action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Move to dig", action) return action.value # Move towards good items # TODO : kick may be a good item only if I cannot kick yet # TODO : might want to destroy good_items = [ constants.Item.ExtraBomb, constants.Item.IncrRange, constants.Item.Kick ] # positions with good items good_time_positions = set() for item in good_items: good_time_positions = good_time_positions.union( reachable_items[item]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward good item", action) return action.value # # Move towards where to bomb # good_time_positions = reachable_items["target"] # If I have no bomb, I do not want to wait at the target that will be covered by flames # before I can place a bomb if my_ammo == 0: first_blast_time = constants.DEFAULT_BOMB_LIFE for t, x, y in reachable_items[constants.Item.Bomb]: life = obs["bomb_life"][(x, y)] if life < first_blast_time: first_blast_time = life _good_time_positions = list() for t, x, y in good_time_positions: if any([ t > first_blast_time, info["list_boards_no_move"][int(first_blast_time)][( x, y)] != constants.Item.Flames.value ]): _good_time_positions.append((t, x, y)) if _good_time_positions: good_time_positions = _good_time_positions action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward where to bomb", action) return action.value # # TODO : move toward might powerups # # # Move towards a fog where we have not seen longest # best_time_position = None oldest = 0 for t, x, y in next_to_items[constants.Item.Fog]: neighbors = [(x + dx, y + dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]] age = max([ info["since_last_seen"][position] for position in neighbors if self._on_board(position) ]) if age > oldest: oldest = age best_time_position = (t, x, y) if best_time_position is not None: action = self._find_distance_minimizer(my_position, [best_time_position], prev, is_survivable) if action is not None: print("Moving toward oldest fog", action) return action.value # # Random action # if constants.Action.Bomb in survivable_actions: survivable_actions.remove(constants.Action.Bomb) action = random.choice(survivable_actions) print("Random action", action, survivable_actions) return action.value