def is_valid_direction(board, row, col, direction, invalid_values, invalid_positions): '''Determins if a move is in a valid direction''' if constants.Action(direction) == constants.Action.Up: return row - 1 >= 0 and board[row - 1][col] not in invalid_values and ( row - 1, col) not in invalid_positions elif constants.Action(direction) == constants.Action.Down: return row + 1 < len(board) and board[ row + 1][col] not in invalid_values and ( row + 1, col) not in invalid_positions elif constants.Action(direction) == constants.Action.Left: return col - 1 >= 0 and board[row][col - 1] not in invalid_values and ( row, col - 1) not in invalid_positions elif constants.Action(direction) == constants.Action.Right: return col + 1 < len( board[0]) and board[row][col + 1] not in invalid_values and ( row, col + 1) not in invalid_positions elif constants.Action(direction) == constants.Action.Stop: return board[row][col] not in invalid_values and ( row, col) not in invalid_positions raise constants.InvalidAction("We did not receive a valid direction: ", direction)
def is_valid_position(board, position, direction, step): row, col = position invalid_values = [item.value for item \ in [constants.Item.Rigid]] if utility.position_on_board(board, position) == False: return False if constants.Action(direction) == constants.Action.Stop: return True if constants.Action(direction) == constants.Action.Up: return row - step >= 0 and board[row - step][col] not in invalid_values if constants.Action(direction) == constants.Action.Down: return row + step < len(board) and board[ row + step][col] not in invalid_values if constants.Action(direction) == constants.Action.Left: return col - step >= 0 and board[row][col - step] not in invalid_values if constants.Action(direction) == constants.Action.Right: return col + step < len(board[0]) and \ board[row][col+step] not in invalid_values raise constants.InvalidAction("We did not receive a valid direction: ", direction)
def _is_valid_direction(board, row, col, direction, invalid_values=None): if invalid_values is None: invalid_values = [ item.value for item in [constants.Item.Rigid, constants.Item.Wood] ] if constants.Action(direction) == constants.Action.Stop: return True if constants.Action(direction) == constants.Action.Up: return row - 1 >= 0 and board[row - 1][col] not in invalid_values if constants.Action(direction) == constants.Action.Down: return row + 1 < len(board) and board[row + 1][col] not in invalid_values if constants.Action(direction) == constants.Action.Left: return col - 1 >= 0 and board[row][col - 1] not in invalid_values if constants.Action(direction) == constants.Action.Right: return col + 1 < len( board[0]) and board[row][col + 1] not in invalid_values raise constants.InvalidAction("We did not receive a valid direction: ", direction)
def rollout(self): # reset search tree in the beginning of each rollout self.reset_tree() # guarantees that we are not called recursively # and episode ends when this agent dies self.env.training_agent = self.agent_id obs = self.env.reset() length = 0 done = False while not done: if args.render: self.env.render() root = self.env.get_json_info() # do Monte-Carlo tree search pi = self.search(root, args.mcts_iters, args.temperature) # sample action from probabilities action = np.random.choice(NUM_ACTIONS, p=pi) # ensure we are not called recursively assert self.env.training_agent == self.agent_id # make other agents act actions = self.env.act(obs) # add my action to list of actions actions.insert(self.agent_id, action) # step environment obs, rewards, done, info = self.env.step(actions) assert self == self.env._agents[self.agent_id] length += 1 print("Agent:", self.agent_id, "Step:", length, "Actions:", [constants.Action(a).name for a in actions], "Probs:", [round(p, 2) for p in pi], "Rewards:", rewards, "Done:", done) reward = rewards[self.agent_id] return length, reward, rewards
def rollout(self, shared_buffer, finished): # reset search tree in the beginning of each rollout self.reset_tree() # guarantees that we are not called recursively # and episode ends when this agent dies self.env.training_agent = self.agent_id obs = self.env.reset() trace = [] done = False while not done and not finished.value: if args.render: self.env.render() # copy weights from trainer self.model.set_weights(pickle.loads(shared_buffer.raw)) # use temperature 1 for first 30 steps and temperature 0 afterwards #temp = 0 if self.env._step_count < 30 else 0 # TODO: only works when agent has access to the env root = self.env.get_json_info() # do Monte-Carlo tree search pi = self.search(root, args.mcts_iters, args.temperature) # sample action from probabilities action = np.random.choice(NUM_ACTIONS, p=pi) # record observations and action probabilities feats = self.observation_to_features(obs[self.agent_id]) trace.append((feats, pi)) # ensure we are not called recursively assert self.env.training_agent == self.agent_id # make other agents act actions = self.env.act(obs) # add my action to list of actions actions.insert(self.agent_id, action) # step environment obs, rewards, done, info = self.env.step(actions) assert self == self.env._agents[self.agent_id] print("Agent:", self.agent_id, "Step:", self.env._step_count, "Actions:", [constants.Action(a).name for a in actions], "Probs:", [round(p, 2) for p in pi], "Entropy: %.2f" % self.entropies[-1], "Iters/s: %.2f" % self.iters_sec[-1], "Rewards:", rewards, "Done:", done) #print("Rollout finished:", finished.value) reward = rewards[self.agent_id] #print("Agent:", self.agent_id, "Reward:", reward, "Len trace:", len(trace)) return trace, reward, rewards
def set_json_info(self): """Sets the game state as the init_game_state.""" board_size = int(self._init_game_state['board_size']) self._board_size = board_size self._step_count = int(self._init_game_state['step_count']) board_array = json.loads(self._init_game_state['board']) self._board = np.ones((board_size, board_size)).astype(np.uint8) self._board *= constants.Item.Passage.value for x in range(self._board_size): for y in range(self._board_size): self._board[x, y] = board_array[x][y] self._items = {} item_array = json.loads(self._init_game_state['items']) for i in item_array: self._items[tuple(i[0])] = i[1] agent_array = json.loads(self._init_game_state['agents']) for a in agent_array: agent = next(x for x in self._agents \ if x.agent_id == a['agent_id']) agent.set_start_position((a['position'][0], a['position'][1])) agent.reset(int(a['ammo']), bool(a['is_alive']), int(a['blast_strength']), bool(a['can_kick'])) self._bombs = [] bomb_array = json.loads(self._init_game_state['bombs']) for b in bomb_array: bomber = next(x for x in self._agents \ if x.agent_id == b['bomber_id']) moving_direction = b['moving_direction'] if moving_direction is not None: moving_direction = constants.Action(moving_direction) self._bombs.append( characters.Bomb(bomber, tuple(b['position']), int(b['life']), int(b['blast_strength']), moving_direction)) self._flames = [] flame_array = json.loads(self._init_game_state['flames']) for f in flame_array: self._flames.append( characters.Flame(tuple(f['position']), f['life']))
def handle_agent_move(game_data, agent_id, row, col, action): if action == constants.Action.Stop.value: return row, col elif action == constants.Action.Bomb.value: ammo = EnvSimulator._get_agent_value(game_data, agent_id, AMMO_POS) if game_data[row, col] < 10000 and ammo > 0: game_data[row, col] = 10009 + ( agent_id + 3) * 1000 + EnvSimulator._get_agent_value( game_data, agent_id, BLAST_STRENGTH_POS) * 10 EnvSimulator._set_agent_value(game_data, agent_id, AMMO_POS, ammo - 1) return row, col else: invalid_values = [ constants.Item.Rigid.value, constants.Item.Wood.value ] if EnvSimulator._is_valid_direction(game_data, row, col, action, invalid_values): return utility.get_next_position((row, col), constants.Action(action)) else: return row, col
def bomb_test(observ, flame_positions, remaining_directions): if observ['ammo'] < 1: return False my_position, board, bomb_life, blast_st, enemies, teammate = \ observ['position'], observ['board'], observ['bomb_life'], observ['bomb_blast_strength'], observ['enemies'], observ['teammate'] if my_position in flame_positions: return False my_agent_id = board[my_position] teammate_id = observ['teammate'].value mate_pos = np.where(board == teammate_id) if mate_pos[0].shape[0] > 0: m_x, m_y = mate_pos[0][0], mate_pos[1][0] if abs(m_x - my_position[0]) + abs( m_y - my_position[1]) <= observ['blast_strength'] * 2: return False #not bomb when my_position is covered by a bomb with life<=life_value def neighbor_test(my_pos, life_value): x, y = my_pos i = x - 1 sz = len(board) while i >= 0: position = (i, y) if not utility.position_on_board(board, position): break if int(bomb_life[i, y]) <= life_value and blast_st[i, y] > abs(i - x): return False if not position_can_be_bomb_through(board, position): break i -= 1 i = x + 1 while i < sz: position = (i, y) if not utility.position_on_board(board, position): break if int(bomb_life[i, y]) <= life_value and blast_st[i, y] > abs(i - x): return False if not position_can_be_bomb_through(board, position): break i += 1 j = y - 1 while j >= 0: position = (x, j) if not utility.position_on_board(board, position): break if int(bomb_life[x, j]) <= life_value and blast_st[x, j] > abs(j - y): return False if not position_can_be_bomb_through(board, position): break j -= 1 j = y + 1 while j < sz: position = (x, j) if not utility.position_on_board(board, position): break if int(bomb_life[x, j]) <= life_value and blast_st[x, j] > abs(j - y): return False if not position_can_be_bomb_through(board, position): break j += 1 return True if not neighbor_test(my_position, life_value=10): return False directions = [ constants.Action.Down, constants.Action.Up, constants.Action.Left, constants.Action.Right ] #not place bomb when agent is at the intersections of two or more corridors corridors = [] for d in directions: d = constants.Action(d) next_pos = utility.get_next_position(my_position, d) if not utility.position_on_board(board, next_pos): continue if not position_can_be_bomb_through(board, next_pos): continue if not neighbor_test(next_pos, life_value=10): return False perpendicular_dirs = [constants.Action.Left, constants.Action.Right] if d == constants.Action.Left or d == constants.Action.Right: perpendicular_dirs = [constants.Action.Down, constants.Action.Up] ret = direction_filter.is_in_corridor(board, next_pos, perpendicular_dirs) corridors.append(ret) if len(corridors) >= 2 and all(corridors): return False return True
def act(self, obs, action_space): # # Definitions # self._search_range = 10 board = obs['board'] my_position = obs["position"] # tuple([x,y]): my position my_ammo = obs['ammo'] # int: the number of bombs I have my_blast_strength = obs['blast_strength'] my_enemies = [constants.Item(e) for e in obs['enemies']] # # Prepare extended observations # - bomb moving direction # - flame remaining life # # Summarize information about bombs # curr_bombs : list of current bombs # moving_direction : array of moving direction of bombs curr_bombs, moving_direction, self._prev_bomb_life \ = self._get_bombs(obs, self._prev_bomb_life) # Summarize information about flames curr_flames, self._prev_flame_life \ = self._get_flames(board, self._prev_flame_life, self._prev_bomb_position_strength) # bombs to be exploded in the next step self._prev_bomb_position_strength = list() rows, cols = np.where(obs["bomb_blast_strength"] > 0) for position in zip(rows, cols): strength = int(obs["bomb_blast_strength"][position]) self._prev_bomb_position_strength.append((position, strength)) # # Understand current situation # # Simulation assuming enemies stay unmoved # List of simulated boards list_boards_no_move, _ \ = self._board_sequence(board, curr_bombs, curr_flames, self._search_range, my_position, enemy_mobility=0) # List of the set of survivable time-positions at each time # and preceding positions survivable_no_move, prev_no_move \ = self._search_time_expanded_network(list_boards_no_move, my_position) # Items that can be reached in a survivable manner reachable_items_no_move, reached_no_move, next_to_items_no_move \ = self._find_reachable_items(list_boards_no_move, my_position, survivable_no_move) # Simulation assuming enemies move for enemy_mobility in range(3, -1, -1): # List of boards simulated list_boards, _ = self._board_sequence(board, curr_bombs, curr_flames, self._search_range, my_position, enemy_mobility=enemy_mobility) # List of the set of survivable time-positions at each time # and preceding positions survivable, prev = self._search_time_expanded_network(list_boards, my_position) if len(survivable[1]) > 0: # Gradually reduce the mobility of enemy, so we have at least one survivable action break # Items that can be reached in a survivable manner reachable_items, reached, next_to_items \ = self._find_reachable_items(list_boards, my_position, survivable) # Survivable actions is_survivable, survivable_with_bomb \ = self._get_survivable_actions(survivable, obs, curr_bombs, curr_flames) survivable_actions = [a for a in is_survivable if is_survivable[a]] if verbose: print("survivable actions are", survivable_actions) # Positions where we kick a bomb if we move to kickable = self._kickable_positions(obs, moving_direction) print() for t in range(0): print(list_boards[t]) print(survivable[t]) for key in prev[t]: print(key, prev[t][key]) # # Choose an action # """ # This is not effective in the current form if len(survivable_actions) > 1: # avoid the position if only one position at the following step # the number of positions that can be reached from the next position next = defaultdict(set) next_count = defaultdict(int) for position in survivable[1]: next[position] = set([p for p in prev[2] if position in prev[2][p]]) next_count[position] = len(next[position]) print("next count", next_count) if max(next_count.values()) > 1: for position in survivable[1]: if next_count[position] == 1: risky_action = self._get_direction(my_position, position) is_survivable[risky_action] = False survivable_actions = [a for a in is_survivable if is_survivable[a]] """ # Do not stay on a bomb if I can if all([obs["bomb_life"][my_position] > 0, len(survivable_actions) > 1, is_survivable[constants.Action.Stop]]): is_survivable[constants.Action.Stop] = False survivable_actions = [a for a in is_survivable if is_survivable[a]] if len(survivable_actions) == 0: # must die # TODO: might want to do something that can help team mate # TODO: kick if possible print("Must die", constants.Action.Stop) return super().act(obs, action_space) # return constants.Action.Stop.value elif len(survivable_actions) == 1: # move to the position if it is the only survivable position action = survivable_actions[0] print("The only survivable action", action) return action.value # Move towards good items good_items = [constants.Item.ExtraBomb, constants.Item.IncrRange] # TODO : kick may be a good item only if I cannot kick yet # TODO : might want to destroy good_items.append(constants.Item.Kick) # positions with good items good_time_positions = set() for item in good_items: good_time_positions = good_time_positions.union(reachable_items[item]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward good item", action) return action.value # TODO : shoud check the survivability of all agents in one method # Place a bomb if # - it does not significantly reduce my survivability # - it can break wood # - it can reduce the survivability of enemies if is_survivable[constants.Action.Bomb]: # if survavable now after bomb, consider bomb if all([len(s) > 0 for s in survivable_with_bomb]): # if survivable all the time after bomb, consider bomb if all([self._can_break_wood(list_boards_no_move[-1], my_position, my_blast_strength)] + [not utility.position_is_flames(board, my_position) for board in list_boards_no_move[:10]]): # place bomb if can break wood print("Bomb to break wood", constants.Action.Bomb) return constants.Action.Bomb.value for enemy in my_enemies: # check if the enemy is reachable if len(reachable_items_no_move[enemy]) == 0: continue # can reach the enemy at enemy_position in enemy_time step enemy_time = reachable_items_no_move[enemy][0][0] enemy_position = reachable_items_no_move[enemy][0][1:3] # find direction towards enemy positions = set([x[1:3] for x in next_to_items_no_move[enemy]]) for t in range(enemy_time, 1, -1): _positions = set() for position in positions: _positions = _positions.union(prev_no_move[t][position]) positions = _positions.copy() #if enemy_time <= my_blast_strength: if True: positions.add(my_position) positions_after_bomb = set(survivable[1]).difference(positions) if positions_after_bomb: print("Bomb to kill an enemy", enemy, constants.Action.Bomb) return constants.Action.Bomb.value else: # bomb to kick x0, y0 = my_position positions_against = [(2*x0-x, 2*y0-y) for (x, y) in positions] positions_after_bomb = set(survivable[1]).intersection(positions_against) if positions_after_bomb: print("Bomb to kick", enemy, constants.Action.Bomb) return constants.Action.Bomb.value """ # check if placing a bomb can reduce the survivability # of the enemy survivable_before, _ = self._search_time_expanded_network(list_boards_no_move, enemy_position) board_with_bomb = deepcopy(obs["board"]) curr_bombs_with_bomb = deepcopy(curr_bombs) # lay a bomb board_with_bomb[my_position] = constants.Item.Bomb.value bomb = characters.Bomb(characters.Bomber(), # dummy owner of the bomb my_position, constants.DEFAULT_BOMB_LIFE, my_blast_strength, None) curr_bombs_with_bomb.append(bomb) list_boards_with_bomb, _ \ = self._board_sequence(board_with_bomb, curr_bombs_with_bomb, curr_flames, self._search_range, my_position, enemy_mobility=0) survivable_after, _ \ = self._search_time_expanded_network(list_boards_with_bomb, enemy_position) good_before = np.array([len(s) for s in survivable_before]) good_after = np.array([len(s) for s in survivable_after]) # TODO : what are good criteria? if any(good_after < good_before): # place a bomb if it makes sense print("Bomb to kill an enemy", constants.Action.Bomb) return constants.Action.Bomb.value """ # Move towards a wood if len(next_to_items_no_move[constants.Item.Wood]) > 0: # positions next to wood good_time_positions = next_to_items_no_move[constants.Item.Wood] action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if action is not None: print("Moving toward wood", action) return action.value # kick whatever I can kick # -- tentative, this is generally not a good strategy if len(kickable) > 0: while kickable: # then consider what happens if I kick a bomb next_position = kickable.pop() # do not kick a bomb if it will break a wall if all([moving_direction[next_position] is None, self._can_break_wood(board, next_position, my_blast_strength)]): # if it is a static bomb # do not kick if it is breaking a wall continue my_action = self._get_direction(my_position, next_position) list_boards_with_kick, next_position \ = self._board_sequence(obs["board"], curr_bombs, curr_flames, self._search_range, my_position, my_action=my_action, can_kick=True, enemy_mobility=3) survivable_with_kick, prev_kick \ = self._search_time_expanded_network(list_boards_with_kick[1:], next_position) if next_position in survivable_with_kick[0]: print("Kicking", my_action) return my_action.value # Move towards an enemy good_time_positions = set() for enemy in my_enemies: good_time_positions = good_time_positions.union(next_to_items[enemy]) if len(good_time_positions) > 0: action = self._find_distance_minimizer(my_position, good_time_positions, prev, is_survivable) if obs["bomb_life"][my_position] > 0: # if on a bomb, move away if action == constants.Action.Down and is_survivable[constants.Action.Up]: action = constants.Action.Up elif action == constants.Action.Up and is_survivable[constants.Action.Down]: action = constants.Action.Down elif action == constants.Action.Right and is_survivable[constants.Action.Left]: action = constants.Action.Left elif action == constants.Action.Left and is_survivable[constants.Action.Right]: action = constants.Action.Right else: action = None if action is not None: print("Moving toward/against enemy", action) return action.value # # as in the agent from the previous competition # action = super().act(obs, action_space) if is_survivable[constants.Action(action)]: print("Action from prev. agent", constants.Action(action)) return action else: action = random.choice(survivable_actions) print("Random action", action) return action.value
env.render() actions = env.act(obs) actions[1] = ppo_agent.compute_action(observation=penv.featurize( obs[1]), policy_id="ppo_policy") actions[3] = ppo_agent.compute_action(observation=penv.featurize( obs[3]), policy_id="ppo_policy") obs, reward, done, info = env.step(actions) features = penv.featurize(obs[1]) for i in range(13): print("i:", i) print(features["board"][:, :, i]) print("======") print(obs[1]["board"]) print() print(obs[1]["bomb_life"]) print("step:", step) print("alive:", obs[1]["alive"]) print("actions:", [constants.Action(action) for action in actions]) print("reward:", reward) print("done:", done) print("info:", info) print("=========") step += 1 env.render(close=True) # env.close()
def act(game_data, actions): MIN_FIRE = 20 AGENT_0 = 10 AGENT_1 = 11 if EnvSimulator.get_done(game_data): return #print(game_data, actions) # move objects pos_agent0_prev = None pos_agent0 = None pos_agent1_prev = None pos_agent1 = None pos_bomb_prev = [] for row in range(game_data.shape[1]): for col in range(game_data.shape[1]): if EnvSimulator._is_fire(game_data, (row, col)): game_data[row, col] -= 1 if game_data[row, col] == MIN_FIRE: game_data[row, col] = 0 elif game_data[row, col] == AGENT_1 or game_data[row, col] >= 14000: pos_agent1_prev = (row, col) pos_agent1 = EnvSimulator.handle_agent_move( game_data, 1, row, col, actions[1]) elif game_data[row, col] == AGENT_0 or game_data[row, col] >= 13000: pos_agent0_prev = (row, col) pos_agent0 = EnvSimulator.handle_agent_move( game_data, 0, row, col, actions[0]) if game_data[row, col] >= 10000: pos_bomb_prev.append((row, col)) if pos_agent0 == pos_agent1: pos_agent0 = pos_agent0_prev pos_agent1 = pos_agent1_prev # move bombs pos_bomb = [] change = False invalid_values = [ constants.Item.Rigid.value, constants.Item.Wood.value, constants.Item.Kick, constants.Item.IncrRange, constants.Item.ExtraBomb ] for bomb_pos in pos_bomb_prev: bomb = game_data[bomb_pos] direction = int((bomb % 1000) / 100) if direction == 0 and bomb_pos == pos_agent0: if pos_agent0 != pos_agent0_prev: # kick bomb direction = EnvSimulator.get_direction( pos_agent0_prev, pos_agent0).value elif int((bomb % 10000) / 1000) != 1 and int( (bomb % 10000) / 1000) != 3: raise ValueError("Fatal Error") elif direction == 0 and bomb_pos == pos_agent1: if pos_agent1 != pos_agent1_prev: # kick bomb direction = EnvSimulator.get_direction( pos_agent1_prev, pos_agent1).value elif int((bomb % 10000) / 1000) != 2 and int( (bomb % 10000) / 1000) != 4: raise ValueError("Fatal Error") new_bomb_pos = bomb_pos if direction > 0: change = True row, col = bomb_pos if EnvSimulator._is_valid_direction(game_data, row, col, direction, invalid_values): new_bomb_pos = utility.get_next_position( bomb_pos, constants.Action(direction)) if (row, col) == pos_agent0 or (row, col) == pos_agent1: new_bomb_pos = bomb_pos pos_bomb.append(new_bomb_pos) while change: change = False # bomb <-> bomb for i in range(len(pos_bomb)): pos = pos_bomb[i] for j in range(len(pos_bomb)): if i != j and pos == pos_bomb[j]: pos_bomb[i] = pos_bomb_prev[i] pos_bomb[j] = pos_bomb_prev[j] change = True if pos_bomb[i] == pos_agent0 and ( pos_bomb[i] != pos_bomb_prev[i] or pos_agent0 != pos_agent0_prev): pos_agent0 = pos_agent0_prev pos_bomb[i] = pos_bomb_prev[i] change = True elif pos_bomb[i] == pos_agent1 and ( pos_bomb[i] != pos_bomb_prev[i] or pos_agent1 != pos_agent1_prev): pos_agent1 = pos_agent1_prev pos_bomb[i] = pos_bomb_prev[i] change = True for i in range(len(pos_bomb)): cur_value = game_data[pos_bomb_prev[i]] life = int(cur_value % 10) - 1 if 20 < game_data[pos_bomb[i]] < 30: life = 0 strength = int((cur_value % 100) / 10) direction = EnvSimulator.get_direction(pos_bomb[i], pos_bomb_prev[i]).value player = int((cur_value % 10000) / 1000) if player > 2: player -= 2 if pos_agent0 == pos_bomb[i] or pos_agent1 == pos_bomb[i]: player += 2 game_data[pos_bomb_prev[i]] = 0 game_data[pos_bomb[ i]] = 10000 + player * 1000 + direction * 100 + strength * 10 + life # set agent #print(pos_agent0, pos_agent1) EnvSimulator._agent_collect(game_data, 0, pos_agent0) EnvSimulator._agent_collect(game_data, 1, pos_agent1) if pos_agent0_prev != pos_agent0: if game_data[pos_agent0_prev] < 10000: game_data[pos_agent0_prev] = 0 if EnvSimulator._is_fire(game_data, pos_agent0): EnvSimulator._agent_died(game_data, 0) else: game_data[pos_agent0] = AGENT_0 if pos_agent1_prev != pos_agent1: if game_data[pos_agent1_prev] < 10000: game_data[pos_agent1_prev] = 0 if EnvSimulator._is_fire(game_data, pos_agent1): EnvSimulator._agent_died(game_data, 1) else: game_data[pos_agent1] = AGENT_1 # fire bombs fire = True while fire: fire = False for bomb in pos_bomb: bomb_value = game_data[bomb] if int(bomb_value % 10) == 0: strength = int((bomb_value % 100) / 10) EnvSimulator._set_fire(game_data, bomb[0], bomb[1], True) EnvSimulator._fire_bomb(game_data, bomb[0], bomb[1], 0, 1, strength - 1) # right EnvSimulator._fire_bomb(game_data, bomb[0], bomb[1], 0, -1, strength - 1) # left EnvSimulator._fire_bomb(game_data, bomb[0], bomb[1], 1, 0, strength - 1) # down EnvSimulator._fire_bomb(game_data, bomb[0], bomb[1], -1, 0, strength - 1) # up fire = True