def BFS_when_unsafe(my_position, env, enemies, trained_num=0): q = queue.Queue() q.put(BFSNode(env, my_position, [constants.Action.Stop], 0)) ret = [] while not q.empty(): temp_node = q.get() if temp_node.steps >= BFS_EPISODES: ret.append(temp_node) continue for row, col in [(0, 0), (-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (temp_node.my_position[0] + row, temp_node.my_position[1] + col) if row == 0 and col == 0: action = constants.Action.Stop else: action = utility.get_direction(temp_node.my_position, new_position) if CanGo(temp_node, action, enemies): q.put( BFSNode(temp_node.env, new_position, temp_node.taken_action + [action], temp_node.steps + 1)) ret.sort(key=lambda x: x.dist_from_start()) return ret
def _get_direction_towards_position(my_position, position, prev): if not position: return None next_position = position while prev[next_position] != my_position: next_position = prev[next_position] return utility.get_direction(my_position, next_position)
def get_next_direction_according_to_prev(my_position, target_position, prev): cached_position = target_position if not cached_position: return None while prev[cached_position] != my_position: cached_position = prev[cached_position] if cached_position is None: return None return utility.get_direction(my_position, cached_position)
def get_direction_towards_position(my_position, position, prev): if not position: return None next_position = position if prev[next_position] == None: return POSTION.none while prev[next_position] != my_position: next_position = prev[next_position] return utility.get_direction(my_position, next_position)
def _get_direction(cls, this_position, next_position): """ Direction from this position to next position Parameters ---------- this_position : tuple this position next_position : tuple next position Return ------ direction : constants.Item.Action """ if this_position == next_position: return constants.Action.Stop else: return utility.get_direction(this_position, next_position)
def _find_safe_directions(self, board, my_position, unsafe_directions, bombs, enemies): def is_stuck_direction(next_position, bomb_range, next_board, enemies): '''Helper function to do determine if the agents next move is possible.''' Q = queue.PriorityQueue() Q.put((0, next_position)) seen = set() next_x, next_y = next_position is_stuck = True while not Q.empty(): dist, position = Q.get() seen.add(position) position_x, position_y = position if next_x != position_x and next_y != position_y: is_stuck = False break if dist > bomb_range: is_stuck = False break for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + position_x, col + position_y) if new_position in seen: continue if not utility.position_on_board(next_board, new_position): continue if not utility.position_is_passable( next_board, new_position, enemies): continue dist = abs(row + position_x - next_x) + abs(col + position_y - next_y) Q.put((dist, new_position)) return is_stuck # All directions are unsafe. Return a position that won't leave us locked. safe = [] if len(unsafe_directions) == 4: next_board = board.copy() next_board[my_position] = constants.Item.Bomb.value for direction, bomb_range in unsafe_directions.items(): next_position = utility.get_next_position( my_position, direction) next_x, next_y = next_position if not utility.position_on_board(next_board, next_position) or \ not utility.position_is_passable(next_board, next_position, enemies): continue if not is_stuck_direction(next_position, bomb_range, next_board, enemies): # We found a direction that works. The .items provided # a small bit of randomness. So let's go with this one. return [direction] if not safe: safe = [constants.Action.Stop] return safe x, y = my_position disallowed = [] # The directions that will go off the board. for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: position = (x + row, y + col) direction = utility.get_direction(my_position, position) # Don't include any direction that will go off of the board. if not utility.position_on_board(board, position): disallowed.append(direction) continue # Don't include any direction that we know is unsafe. if direction in unsafe_directions: continue if utility.position_is_passable( board, position, enemies) or utility.position_is_fog( board, position): safe.append(direction) if not safe: # We don't have any safe directions, so return something that is allowed. safe = [k for k in unsafe_directions if k not in disallowed] if not safe: # We don't have ANY directions. So return the stop choice. return [constants.Action.Stop] return safe
def _find_safe_directions(self, board, my_position, unsafe_directions, bombs, enemies, item): def is_stuck_direction(next_position, bomb_range, next_board, enemies): '''Helper function to do determine if the agents next move is possible.''' Q = queue.PriorityQueue() Q.put((0, next_position)) seen = set() next_x, next_y = next_position is_stuck = True while not Q.empty(): dist, position = Q.get() seen.add(position) #FIXME is_stuck=False position_x, position_y = position if next_x != position_x and next_y != position_y: is_stuck = False break if dist > bomb_range: is_stuck = False break for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: new_position = (row + position_x, col + position_y) if new_position in seen: continue if not utility.position_on_board(next_board, new_position): continue if not utility.position_is_passable( next_board, new_position, enemies): continue dist = abs(row + position_x - next_x) + abs(col + position_y - next_y) Q.put((dist, new_position)) return is_stuck # All directions are unsafe. Return a position that won't leave us locked. safe = [] if len(unsafe_directions) == 4: next_board = board.copy() next_board[my_position] = constants.Item.Bomb.value disallowed = [] for direction, bomb_range in unsafe_directions.items(): next_position = utility.get_next_position( my_position, direction) next_x, next_y = next_position if not utility.position_on_board(next_board, next_position) or \ not utility.position_is_passable(next_board, next_position, enemies): disallowed.append(direction) continue if not is_stuck_direction(next_position, bomb_range, next_board, enemies): # We found a direction that works. The .items provided # a small bit of randomness. So let's go with this one. return [direction] if not safe: #当决定不动之前,判断是否是原地放炸弹,如果是原地放炸弹那么从unsafe_directions中随机一个 # for i in bombs: # if len(bombs) == 1 : if len(item[constants.Item(3)]) == 1: # if my_position == i['position']: for bomb in bombs: if my_position == bomb['position']: safe = [ k for k in unsafe_directions if k not in disallowed ] # break if not safe: safe = [constants.Action.Stop] return safe x, y = my_position disallowed = [] # The directions that will go off the board. for row, col in [(-1, 0), (1, 0), (0, -1), (0, 1)]: position = (x + row, y + col) direction = utility.get_direction(my_position, position) # Don't include any direction that will go off of the board. if not utility.position_on_board(board, position): disallowed.append(direction) continue # Don't include any direction that we know is unsafe. if direction in unsafe_directions: #当这个不安全位置不能通过的时候就disallow,防止踢炸弹 if not utility.position_is_passable(board, position, enemies): disallowed.append(direction) #当往不安全方向走,正好被炸死的话,那么就不能走。(刚好被炸死需要通过life来制定) # if continue if utility.position_is_passable( board, position, enemies) or utility.position_is_fog( board, position): #可能移动一个位置,隔壁存在炸弹 safe.append(direction) for bomb in bombs: if bomb['bomb_life'] == 1: bomb_x, bomb_y = bomb['position'] if bomb_x == position[0] and abs( bomb_y - position[1]) <= bomb['blast_strength']: #remove the direction safe.pop() break elif bomb_y == position[1] and abs( bomb_x - position[0]) <= bomb['blast_strength']: safe.pop() break if not safe: # We don't have any safe directions, so return something that is allowed. safe = [k for k in unsafe_directions if k not in disallowed] if not safe: # We don't have ANY directions. So return the stop choice. return [constants.Action.Stop] return safe
def update(game_data, obs, my_id): enemy_id = 0 if my_id is 0: enemy_id = 1 if (game_data.board_size != len(obs['board'])): raise ValueError('Invalid update: boardsize different!') if (game_data.step_count + 1 != obs['step_count']): raise ValueError('Invalid update: missed step count!') game_data.step_count = obs['step_count'] new_board = EnvSimulator.get_board(game_data.board_size, obs['board']) new_bomb_life = EnvSimulator.get_board(game_data.board_size, obs['bomb_life'], 0) new_bomb_strength = EnvSimulator.get_board(game_data.board_size, obs['bomb_blast_strength'], 0) reset = False # get actions actions = {} for a in game_data.agents: old_pos = EnvSimulator.get_position(game_data.board, a.agent_id + 10, True) new_pos = EnvSimulator.get_position(new_board, a.agent_id + 10, True) if not a.is_alive: raise ValueError('update error: agent life!') for b in game_data.bombs: if b.moving_direction != None: pass if (old_pos != new_pos): actions[a.agent_id] = utility.get_direction(old_pos, new_pos).value if not a.can_kick and game_data.board[ new_pos] == constants.Item.Bomb.value: for b in game_data.bombs: if b.position == new_pos and b.moving_direction == None: a.can_kick = True reset = True elif new_bomb_life[new_pos] == constants.DEFAULT_BOMB_LIFE: actions[a.agent_id] = constants.Action.Bomb.value if a.ammo == 0: a.ammo += 1 reset = True if a.blast_strength != new_bomb_strength[new_pos]: a.blast_strength = new_bomb_strength[new_pos] reset = True else: actions[a.agent_id] = constants.Action.Stop.value save_game_data = copy.deepcopy(game_data) EnvSimulator.act(game_data, actions) if game_data.agents[0].is_alive != (10 in obs['alive']): raise ValueError( f'update error: agent life!\n\n{game_data.board}\n\n{new_board}' ) if game_data.agents[1].is_alive != (11 in obs['alive']): raise ValueError( f'update error: agent life!\n\n{game_data.board}\n\n{new_board}' ) if (len(game_data.bombs) != len(new_bomb_life[new_bomb_life > 0])): raise ValueError( f'update error: bomb count!\n\n{game_data.board}\n\n{new_board}' ) # print("board: \n", game_data.board) # print("agent1: ", game_data.agents[0].ammo, game_data.agents[0].blast_strength, game_data.agents[0].can_kick) # print("agent2: ", game_data.agents[1].ammo, game_data.agents[1].blast_strength, game_data.agents[1].can_kick) # compare boards equal, equal_noitems = EnvSimulator.boards_equal( game_data.board, new_board, True) if not equal: if equal_noitems: reset = True # EQUAL WITHOUT ITEMS => SOMEWHERE NEW ITEMS AVAILABLE -> RESET else: print( f'board unequal: {game_data.board}\n\n{new_board}\n\n{actions}' ) def find_actions(save_game_data, actions): actions_1 = [actions[0]] if actions[0] != 0 else range( 1, 6) actions_2 = [actions[1]] if actions[1] != 0 else range( 1, 6) for a1 in actions_1: for a2 in actions_2: game_data = copy.deepcopy(save_game_data) acts = {0: a1, 1: a2} EnvSimulator.act(game_data, acts) eq, eq_noitems = EnvSimulator.boards_equal( game_data.board, new_board, True) if eq_noitems: return game_data, acts, eq return None, None, False game_data, actions, eq = find_actions(save_game_data, actions) print(f'found game_data: {game_data}\n\n{actions}') if not game_data: game_data, actions, eq = find_actions( save_game_data, actions) game_data, actions, eq = find_actions( save_game_data, actions) raise ValueError(f'should not happen anymore') if not eq: reset = True # EQUAL WITHOUT ITEMS => SOMEWHERE NEW ITEMS AVAILABLE -> RESET game_data.agents[my_id].ammo = int(obs['ammo']) game_data.agents[my_id].blast_strength = int(obs['blast_strength']) game_data.agents[my_id].can_kick = bool(obs['can_kick']) # update board because of items game_data.board = new_board return game_data, actions, reset