Esempio n. 1
0
    def is_valid_direction(board, row, col, direction, invalid_values,
                           invalid_positions):
        '''Determins if a move is in a valid direction'''
        if constants.Action(direction) == constants.Action.Up:
            return row - 1 >= 0 and board[row -
                                          1][col] not in invalid_values and (
                                              row - 1,
                                              col) not in invalid_positions
        elif constants.Action(direction) == constants.Action.Down:
            return row + 1 < len(board) and board[
                row + 1][col] not in invalid_values and (
                    row + 1, col) not in invalid_positions
        elif constants.Action(direction) == constants.Action.Left:
            return col - 1 >= 0 and board[row][col -
                                               1] not in invalid_values and (
                                                   row, col -
                                                   1) not in invalid_positions
        elif constants.Action(direction) == constants.Action.Right:
            return col + 1 < len(
                board[0]) and board[row][col + 1] not in invalid_values and (
                    row, col + 1) not in invalid_positions
        elif constants.Action(direction) == constants.Action.Stop:
            return board[row][col] not in invalid_values and (
                row, col) not in invalid_positions

        raise constants.InvalidAction("We did not receive a valid direction: ",
                                      direction)
Esempio n. 2
0
def is_valid_position(board, position, direction, step):
    row, col = position
    invalid_values = [item.value for item \
                                        in [constants.Item.Rigid]]
    if utility.position_on_board(board, position) == False:
        return False

    if constants.Action(direction) == constants.Action.Stop:
        return True

    if constants.Action(direction) == constants.Action.Up:
        return row - step >= 0 and board[row - step][col] not in invalid_values

    if constants.Action(direction) == constants.Action.Down:
        return row + step < len(board) and board[
            row + step][col] not in invalid_values

    if constants.Action(direction) == constants.Action.Left:
        return col - step >= 0 and board[row][col - step] not in invalid_values

    if constants.Action(direction) == constants.Action.Right:
        return col + step < len(board[0]) and \
            board[row][col+step] not in invalid_values

    raise constants.InvalidAction("We did not receive a valid direction: ",
                                  direction)
Esempio n. 3
0
    def _is_valid_direction(board, row, col, direction, invalid_values=None):
        if invalid_values is None:
            invalid_values = [
                item.value
                for item in [constants.Item.Rigid, constants.Item.Wood]
            ]

        if constants.Action(direction) == constants.Action.Stop:
            return True

        if constants.Action(direction) == constants.Action.Up:
            return row - 1 >= 0 and board[row - 1][col] not in invalid_values

        if constants.Action(direction) == constants.Action.Down:
            return row + 1 < len(board) and board[row +
                                                  1][col] not in invalid_values

        if constants.Action(direction) == constants.Action.Left:
            return col - 1 >= 0 and board[row][col - 1] not in invalid_values

        if constants.Action(direction) == constants.Action.Right:
            return col + 1 < len(
                board[0]) and board[row][col + 1] not in invalid_values

        raise constants.InvalidAction("We did not receive a valid direction: ",
                                      direction)
    def rollout(self):
        # reset search tree in the beginning of each rollout
        self.reset_tree()

        # guarantees that we are not called recursively
        # and episode ends when this agent dies
        self.env.training_agent = self.agent_id
        obs = self.env.reset()

        length = 0
        done = False
        while not done:
            if args.render:
                self.env.render()

            root = self.env.get_json_info()
            # do Monte-Carlo tree search
            pi = self.search(root, args.mcts_iters, args.temperature)
            # sample action from probabilities
            action = np.random.choice(NUM_ACTIONS, p=pi)

            # ensure we are not called recursively
            assert self.env.training_agent == self.agent_id
            # make other agents act
            actions = self.env.act(obs)
            # add my action to list of actions
            actions.insert(self.agent_id, action)
            # step environment
            obs, rewards, done, info = self.env.step(actions)
            assert self == self.env._agents[self.agent_id]
            length += 1
            print("Agent:", self.agent_id, "Step:", length, "Actions:", [constants.Action(a).name for a in actions], "Probs:", [round(p, 2) for p in pi], "Rewards:", rewards, "Done:", done)

        reward = rewards[self.agent_id]
        return length, reward, rewards
Esempio n. 5
0
    def rollout(self, shared_buffer, finished):
        # reset search tree in the beginning of each rollout
        self.reset_tree()

        # guarantees that we are not called recursively
        # and episode ends when this agent dies
        self.env.training_agent = self.agent_id
        obs = self.env.reset()

        trace = []
        done = False
        while not done and not finished.value:
            if args.render:
                self.env.render()

            # copy weights from trainer
            self.model.set_weights(pickle.loads(shared_buffer.raw))

            # use temperature 1 for first 30 steps and temperature 0 afterwards
            #temp = 0 if self.env._step_count < 30 else 0
            # TODO: only works when agent has access to the env
            root = self.env.get_json_info()
            # do Monte-Carlo tree search
            pi = self.search(root, args.mcts_iters, args.temperature)
            # sample action from probabilities
            action = np.random.choice(NUM_ACTIONS, p=pi)
            # record observations and action probabilities
            feats = self.observation_to_features(obs[self.agent_id])
            trace.append((feats, pi))

            # ensure we are not called recursively
            assert self.env.training_agent == self.agent_id
            # make other agents act
            actions = self.env.act(obs)
            # add my action to list of actions
            actions.insert(self.agent_id, action)
            # step environment
            obs, rewards, done, info = self.env.step(actions)
            assert self == self.env._agents[self.agent_id]
            print("Agent:", self.agent_id, "Step:", self.env._step_count,
                  "Actions:", [constants.Action(a).name for a in actions],
                  "Probs:", [round(p, 2) for p in pi],
                  "Entropy: %.2f" % self.entropies[-1],
                  "Iters/s: %.2f" % self.iters_sec[-1], "Rewards:", rewards,
                  "Done:", done)

            #print("Rollout finished:", finished.value)

        reward = rewards[self.agent_id]
        #print("Agent:", self.agent_id, "Reward:", reward, "Len trace:", len(trace))
        return trace, reward, rewards
Esempio n. 6
0
    def set_json_info(self):
        """Sets the game state as the init_game_state."""
        board_size = int(self._init_game_state['board_size'])
        self._board_size = board_size
        self._step_count = int(self._init_game_state['step_count'])

        board_array = json.loads(self._init_game_state['board'])
        self._board = np.ones((board_size, board_size)).astype(np.uint8)
        self._board *= constants.Item.Passage.value
        for x in range(self._board_size):
            for y in range(self._board_size):
                self._board[x, y] = board_array[x][y]

        self._items = {}
        item_array = json.loads(self._init_game_state['items'])
        for i in item_array:
            self._items[tuple(i[0])] = i[1]

        agent_array = json.loads(self._init_game_state['agents'])
        for a in agent_array:
            agent = next(x for x in self._agents \
                         if x.agent_id == a['agent_id'])
            agent.set_start_position((a['position'][0], a['position'][1]))
            agent.reset(int(a['ammo']), bool(a['is_alive']),
                        int(a['blast_strength']), bool(a['can_kick']))

        self._bombs = []
        bomb_array = json.loads(self._init_game_state['bombs'])
        for b in bomb_array:
            bomber = next(x for x in self._agents \
                          if x.agent_id == b['bomber_id'])
            moving_direction = b['moving_direction']
            if moving_direction is not None:
                moving_direction = constants.Action(moving_direction)
            self._bombs.append(
                characters.Bomb(bomber, tuple(b['position']), int(b['life']),
                                int(b['blast_strength']), moving_direction))

        self._flames = []
        flame_array = json.loads(self._init_game_state['flames'])
        for f in flame_array:
            self._flames.append(
                characters.Flame(tuple(f['position']), f['life']))
Esempio n. 7
0
 def handle_agent_move(game_data, agent_id, row, col, action):
     if action == constants.Action.Stop.value:
         return row, col
     elif action == constants.Action.Bomb.value:
         ammo = EnvSimulator._get_agent_value(game_data, agent_id, AMMO_POS)
         if game_data[row, col] < 10000 and ammo > 0:
             game_data[row, col] = 10009 + (
                 agent_id + 3) * 1000 + EnvSimulator._get_agent_value(
                     game_data, agent_id, BLAST_STRENGTH_POS) * 10
             EnvSimulator._set_agent_value(game_data, agent_id, AMMO_POS,
                                           ammo - 1)
         return row, col
     else:
         invalid_values = [
             constants.Item.Rigid.value, constants.Item.Wood.value
         ]
         if EnvSimulator._is_valid_direction(game_data, row, col, action,
                                             invalid_values):
             return utility.get_next_position((row, col),
                                              constants.Action(action))
         else:
             return row, col
Esempio n. 8
0
def bomb_test(observ, flame_positions, remaining_directions):
    if observ['ammo'] < 1:
        return False
    my_position, board, bomb_life, blast_st, enemies, teammate = \
    observ['position'], observ['board'], observ['bomb_life'], observ['bomb_blast_strength'], observ['enemies'], observ['teammate']
    if my_position in flame_positions:
        return False
    my_agent_id = board[my_position]

    teammate_id = observ['teammate'].value
    mate_pos = np.where(board == teammate_id)
    if mate_pos[0].shape[0] > 0:
        m_x, m_y = mate_pos[0][0], mate_pos[1][0]
        if abs(m_x - my_position[0]) + abs(
                m_y - my_position[1]) <= observ['blast_strength'] * 2:
            return False

    #not bomb when my_position is covered by a bomb with life<=life_value
    def neighbor_test(my_pos, life_value):
        x, y = my_pos
        i = x - 1
        sz = len(board)
        while i >= 0:
            position = (i, y)
            if not utility.position_on_board(board, position):
                break
            if int(bomb_life[i, y]) <= life_value and blast_st[i,
                                                               y] > abs(i - x):
                return False
            if not position_can_be_bomb_through(board, position):
                break
            i -= 1
        i = x + 1
        while i < sz:
            position = (i, y)
            if not utility.position_on_board(board, position):
                break
            if int(bomb_life[i, y]) <= life_value and blast_st[i,
                                                               y] > abs(i - x):
                return False
            if not position_can_be_bomb_through(board, position):
                break
            i += 1
        j = y - 1
        while j >= 0:
            position = (x, j)
            if not utility.position_on_board(board, position):
                break
            if int(bomb_life[x, j]) <= life_value and blast_st[x,
                                                               j] > abs(j - y):
                return False
            if not position_can_be_bomb_through(board, position):
                break
            j -= 1
        j = y + 1
        while j < sz:
            position = (x, j)
            if not utility.position_on_board(board, position):
                break
            if int(bomb_life[x, j]) <= life_value and blast_st[x,
                                                               j] > abs(j - y):
                return False
            if not position_can_be_bomb_through(board, position):
                break
            j += 1
        return True

    if not neighbor_test(my_position, life_value=10):
        return False

    directions = [
        constants.Action.Down, constants.Action.Up, constants.Action.Left,
        constants.Action.Right
    ]
    #not place bomb when agent is at the intersections of two or more corridors
    corridors = []
    for d in directions:
        d = constants.Action(d)
        next_pos = utility.get_next_position(my_position, d)
        if not utility.position_on_board(board, next_pos):
            continue
        if not position_can_be_bomb_through(board, next_pos):
            continue
        if not neighbor_test(next_pos, life_value=10):
            return False
        perpendicular_dirs = [constants.Action.Left, constants.Action.Right]
        if d == constants.Action.Left or d == constants.Action.Right:
            perpendicular_dirs = [constants.Action.Down, constants.Action.Up]
        ret = direction_filter.is_in_corridor(board, next_pos,
                                              perpendicular_dirs)
        corridors.append(ret)
    if len(corridors) >= 2 and all(corridors):
        return False
    return True
Esempio n. 9
0
    def act(self, obs, action_space):

        #
        # Definitions
        #

        self._search_range = 10

        board = obs['board']
        my_position = obs["position"]  # tuple([x,y]): my position
        my_ammo = obs['ammo']  # int: the number of bombs I have
        my_blast_strength = obs['blast_strength']
        my_enemies = [constants.Item(e) for e in obs['enemies']]

        #
        # Prepare extended observations
        # - bomb moving direction
        # - flame remaining life
        #

        # Summarize information about bombs
        # curr_bombs : list of current bombs
        # moving_direction : array of moving direction of bombs
        curr_bombs, moving_direction, self._prev_bomb_life \
            = self._get_bombs(obs, self._prev_bomb_life)

        # Summarize information about flames
        curr_flames, self._prev_flame_life \
            = self._get_flames(board, self._prev_flame_life, self._prev_bomb_position_strength)

        # bombs to be exploded in the next step
        self._prev_bomb_position_strength = list()
        rows, cols = np.where(obs["bomb_blast_strength"] > 0)
        for position in zip(rows, cols):
            strength = int(obs["bomb_blast_strength"][position])
            self._prev_bomb_position_strength.append((position, strength))

        #
        # Understand current situation
        #

        # Simulation assuming enemies stay unmoved

        # List of simulated boards
        list_boards_no_move, _ \
            = self._board_sequence(board,
                                   curr_bombs,
                                   curr_flames,
                                   self._search_range,
                                   my_position,
                                   enemy_mobility=0)

        # List of the set of survivable time-positions at each time
        # and preceding positions
        survivable_no_move, prev_no_move \
            = self._search_time_expanded_network(list_boards_no_move,
                                                 my_position)

        # Items that can be reached in a survivable manner
        reachable_items_no_move, reached_no_move, next_to_items_no_move \
            = self._find_reachable_items(list_boards_no_move,
                                         my_position,
                                         survivable_no_move)

        # Simulation assuming enemies move

        for enemy_mobility in range(3, -1, -1):
            # List of boards simulated
            list_boards, _ = self._board_sequence(board,
                                                  curr_bombs,
                                                  curr_flames,
                                                  self._search_range,
                                                  my_position,
                                                  enemy_mobility=enemy_mobility)

            # List of the set of survivable time-positions at each time
            # and preceding positions
            survivable, prev = self._search_time_expanded_network(list_boards,
                                                                  my_position)

            if len(survivable[1]) > 0:
                # Gradually reduce the mobility of enemy, so we have at least one survivable action
                break

        # Items that can be reached in a survivable manner
        reachable_items, reached, next_to_items \
            = self._find_reachable_items(list_boards,
                                         my_position,
                                         survivable)

        # Survivable actions
        is_survivable, survivable_with_bomb \
            = self._get_survivable_actions(survivable,
                                           obs,
                                           curr_bombs,
                                           curr_flames)

        survivable_actions = [a for a in is_survivable if is_survivable[a]]
        
        if verbose:
            print("survivable actions are", survivable_actions)

        # Positions where we kick a bomb if we move to
        kickable = self._kickable_positions(obs, moving_direction)

        print()
        for t in range(0):
            print(list_boards[t])
            print(survivable[t])
            for key in prev[t]:
                print(key, prev[t][key])

        #
        # Choose an action
        #

        """
        # This is not effective in the current form
        if len(survivable_actions) > 1:
            # avoid the position if only one position at the following step
            # the number of positions that can be reached from the next position
            next = defaultdict(set)
            next_count = defaultdict(int)
            for position in survivable[1]:
                next[position] = set([p for p in prev[2] if position in prev[2][p]])
                next_count[position] = len(next[position])
            print("next count", next_count)
            if max(next_count.values()) > 1:
                for position in survivable[1]:
                    if next_count[position] == 1:
                        risky_action = self._get_direction(my_position, position)
                        is_survivable[risky_action] = False
                survivable_actions = [a for a in is_survivable if is_survivable[a]]                
        """

        # Do not stay on a bomb if I can
        if all([obs["bomb_life"][my_position] > 0,
                len(survivable_actions) > 1,
                is_survivable[constants.Action.Stop]]):
            is_survivable[constants.Action.Stop] = False
            survivable_actions = [a for a in is_survivable if is_survivable[a]]

        if len(survivable_actions) == 0:

            # must die
            # TODO: might want to do something that can help team mate
            # TODO: kick if possible
            print("Must die", constants.Action.Stop)
            return super().act(obs, action_space)
            # return constants.Action.Stop.value

        elif len(survivable_actions) == 1:

            # move to the position if it is the only survivable position
            action = survivable_actions[0]
            print("The only survivable action", action)
            return action.value

        # Move towards good items
        good_items = [constants.Item.ExtraBomb, constants.Item.IncrRange]
        # TODO : kick may be a good item only if I cannot kick yet
        # TODO : might want to destroy
        good_items.append(constants.Item.Kick)
        # positions with good items
        good_time_positions = set()
        for item in good_items:
            good_time_positions = good_time_positions.union(reachable_items[item])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward good item", action)
                return action.value

        # TODO : shoud check the survivability of all agents in one method

        # Place a bomb if
        # - it does not significantly reduce my survivability
        # - it can break wood
        # - it can reduce the survivability of enemies
        if is_survivable[constants.Action.Bomb]:
            # if survavable now after bomb, consider bomb
            if all([len(s) > 0 for s in survivable_with_bomb]):
                # if survivable all the time after bomb, consider bomb
                if all([self._can_break_wood(list_boards_no_move[-1],
                                             my_position,
                                             my_blast_strength)]
                       + [not utility.position_is_flames(board, my_position)
                          for board in list_boards_no_move[:10]]):
                    # place bomb if can break wood
                    print("Bomb to break wood", constants.Action.Bomb)
                    return constants.Action.Bomb.value

                for enemy in my_enemies:
                    # check if the enemy is reachable
                    if len(reachable_items_no_move[enemy]) == 0:
                        continue

                    # can reach the enemy at enemy_position in enemy_time step
                    enemy_time = reachable_items_no_move[enemy][0][0]
                    enemy_position = reachable_items_no_move[enemy][0][1:3]

                    # find direction towards enemy
                    positions = set([x[1:3] for x in next_to_items_no_move[enemy]])
                    for t in range(enemy_time, 1, -1):
                        _positions = set()
                        for position in positions:
                            _positions = _positions.union(prev_no_move[t][position])
                        positions = _positions.copy()

                    #if enemy_time <= my_blast_strength:
                    if True:
                        positions.add(my_position)
                        positions_after_bomb = set(survivable[1]).difference(positions)
                        if positions_after_bomb:
                            print("Bomb to kill an enemy", enemy, constants.Action.Bomb)
                            return constants.Action.Bomb.value
                    else:
                        # bomb to kick
                        x0, y0 = my_position
                        positions_against = [(2*x0-x, 2*y0-y) for (x, y) in positions]
                        positions_after_bomb = set(survivable[1]).intersection(positions_against)

                        if positions_after_bomb:
                            print("Bomb to kick", enemy, constants.Action.Bomb)
                            return constants.Action.Bomb.value

                    """
                    # check if placing a bomb can reduce the survivability
                    # of the enemy
                    survivable_before, _ = self._search_time_expanded_network(list_boards_no_move,
                                                                              enemy_position)

                    board_with_bomb = deepcopy(obs["board"])
                    curr_bombs_with_bomb = deepcopy(curr_bombs)
                    # lay a bomb
                    board_with_bomb[my_position] = constants.Item.Bomb.value
                    bomb = characters.Bomb(characters.Bomber(),  # dummy owner of the bomb
                                           my_position,
                                           constants.DEFAULT_BOMB_LIFE,
                                           my_blast_strength,
                                           None)
                    curr_bombs_with_bomb.append(bomb)
                    list_boards_with_bomb, _ \
                        = self._board_sequence(board_with_bomb,
                                               curr_bombs_with_bomb,
                                               curr_flames,
                                               self._search_range,
                                               my_position,
                                               enemy_mobility=0)
                    survivable_after, _ \
                        = self._search_time_expanded_network(list_boards_with_bomb,
                                                             enemy_position)

                    good_before = np.array([len(s) for s in survivable_before])
                    good_after = np.array([len(s) for s in survivable_after])
                    # TODO : what are good criteria?
                    if any(good_after < good_before):
                        # place a bomb if it makes sense
                        print("Bomb to kill an enemy", constants.Action.Bomb)
                        return constants.Action.Bomb.value
                    """

        # Move towards a wood
        if len(next_to_items_no_move[constants.Item.Wood]) > 0:
            # positions next to wood
            good_time_positions = next_to_items_no_move[constants.Item.Wood]
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)
            if action is not None:
                print("Moving toward wood", action)
                return action.value

        # kick whatever I can kick
        # -- tentative, this is generally not a good strategy
        if len(kickable) > 0:

            while kickable:
                # then consider what happens if I kick a bomb
                next_position = kickable.pop()

                # do not kick a bomb if it will break a wall
                if all([moving_direction[next_position] is None,
                        self._can_break_wood(board, next_position, my_blast_strength)]):
                    # if it is a static bomb
                    # do not kick if it is breaking a wall
                    continue

                my_action = self._get_direction(my_position, next_position)
                list_boards_with_kick, next_position \
                    = self._board_sequence(obs["board"],
                                           curr_bombs,
                                           curr_flames,
                                           self._search_range,
                                           my_position,
                                           my_action=my_action,
                                           can_kick=True,
                                           enemy_mobility=3)
                survivable_with_kick, prev_kick \
                    = self._search_time_expanded_network(list_boards_with_kick[1:],
                                                         next_position)
                if next_position in survivable_with_kick[0]:
                    print("Kicking", my_action)
                    return my_action.value

        # Move towards an enemy
        good_time_positions = set()
        for enemy in my_enemies:
            good_time_positions = good_time_positions.union(next_to_items[enemy])
        if len(good_time_positions) > 0:
            action = self._find_distance_minimizer(my_position,
                                                   good_time_positions,
                                                   prev,
                                                   is_survivable)

            if obs["bomb_life"][my_position] > 0:
                # if on a bomb, move away
                if action == constants.Action.Down and is_survivable[constants.Action.Up]:
                    action = constants.Action.Up
                elif action == constants.Action.Up and is_survivable[constants.Action.Down]:
                    action = constants.Action.Down
                elif action == constants.Action.Right and is_survivable[constants.Action.Left]:
                    action = constants.Action.Left
                elif action == constants.Action.Left and is_survivable[constants.Action.Right]:
                    action = constants.Action.Right
                else:
                    action = None

            if action is not None:
                print("Moving toward/against enemy", action)
                return action.value

        #
        # as in the agent from the previous competition
        #
        action = super().act(obs, action_space)
        if is_survivable[constants.Action(action)]:
            print("Action from prev. agent", constants.Action(action))
            return action
        else:
            action = random.choice(survivable_actions)
            print("Random action", action)
            return action.value
Esempio n. 10
0
        env.render()
        actions = env.act(obs)

        actions[1] = ppo_agent.compute_action(observation=penv.featurize(
            obs[1]),
                                              policy_id="ppo_policy")
        actions[3] = ppo_agent.compute_action(observation=penv.featurize(
            obs[3]),
                                              policy_id="ppo_policy")

        obs, reward, done, info = env.step(actions)
        features = penv.featurize(obs[1])
        for i in range(13):
            print("i:", i)
            print(features["board"][:, :, i])
            print("======")
        print(obs[1]["board"])
        print()
        print(obs[1]["bomb_life"])
        print("step:", step)
        print("alive:", obs[1]["alive"])
        print("actions:", [constants.Action(action) for action in actions])

        print("reward:", reward)
        print("done:", done)
        print("info:", info)
        print("=========")
        step += 1
    env.render(close=True)
    # env.close()
Esempio n. 11
0
    def act(game_data, actions):
        MIN_FIRE = 20
        AGENT_0 = 10
        AGENT_1 = 11

        if EnvSimulator.get_done(game_data):
            return

        #print(game_data, actions)

        # move objects
        pos_agent0_prev = None
        pos_agent0 = None
        pos_agent1_prev = None
        pos_agent1 = None
        pos_bomb_prev = []
        for row in range(game_data.shape[1]):
            for col in range(game_data.shape[1]):
                if EnvSimulator._is_fire(game_data, (row, col)):
                    game_data[row, col] -= 1
                    if game_data[row, col] == MIN_FIRE:
                        game_data[row, col] = 0
                elif game_data[row,
                               col] == AGENT_1 or game_data[row, col] >= 14000:
                    pos_agent1_prev = (row, col)
                    pos_agent1 = EnvSimulator.handle_agent_move(
                        game_data, 1, row, col, actions[1])
                elif game_data[row,
                               col] == AGENT_0 or game_data[row, col] >= 13000:
                    pos_agent0_prev = (row, col)
                    pos_agent0 = EnvSimulator.handle_agent_move(
                        game_data, 0, row, col, actions[0])
                if game_data[row, col] >= 10000:
                    pos_bomb_prev.append((row, col))

        if pos_agent0 == pos_agent1:
            pos_agent0 = pos_agent0_prev
            pos_agent1 = pos_agent1_prev

        # move bombs
        pos_bomb = []
        change = False
        invalid_values = [
            constants.Item.Rigid.value, constants.Item.Wood.value,
            constants.Item.Kick, constants.Item.IncrRange,
            constants.Item.ExtraBomb
        ]
        for bomb_pos in pos_bomb_prev:
            bomb = game_data[bomb_pos]
            direction = int((bomb % 1000) / 100)
            if direction == 0 and bomb_pos == pos_agent0:
                if pos_agent0 != pos_agent0_prev:  # kick bomb
                    direction = EnvSimulator.get_direction(
                        pos_agent0_prev, pos_agent0).value
                elif int((bomb % 10000) / 1000) != 1 and int(
                    (bomb % 10000) / 1000) != 3:
                    raise ValueError("Fatal Error")
            elif direction == 0 and bomb_pos == pos_agent1:
                if pos_agent1 != pos_agent1_prev:  # kick bomb
                    direction = EnvSimulator.get_direction(
                        pos_agent1_prev, pos_agent1).value
                elif int((bomb % 10000) / 1000) != 2 and int(
                    (bomb % 10000) / 1000) != 4:
                    raise ValueError("Fatal Error")

            new_bomb_pos = bomb_pos
            if direction > 0:
                change = True
                row, col = bomb_pos
                if EnvSimulator._is_valid_direction(game_data, row, col,
                                                    direction, invalid_values):
                    new_bomb_pos = utility.get_next_position(
                        bomb_pos, constants.Action(direction))
                if (row, col) == pos_agent0 or (row, col) == pos_agent1:
                    new_bomb_pos = bomb_pos

            pos_bomb.append(new_bomb_pos)

        while change:
            change = False
            # bomb <-> bomb
            for i in range(len(pos_bomb)):
                pos = pos_bomb[i]
                for j in range(len(pos_bomb)):
                    if i != j and pos == pos_bomb[j]:
                        pos_bomb[i] = pos_bomb_prev[i]
                        pos_bomb[j] = pos_bomb_prev[j]
                        change = True
                if pos_bomb[i] == pos_agent0 and (
                        pos_bomb[i] != pos_bomb_prev[i]
                        or pos_agent0 != pos_agent0_prev):
                    pos_agent0 = pos_agent0_prev
                    pos_bomb[i] = pos_bomb_prev[i]
                    change = True
                elif pos_bomb[i] == pos_agent1 and (
                        pos_bomb[i] != pos_bomb_prev[i]
                        or pos_agent1 != pos_agent1_prev):
                    pos_agent1 = pos_agent1_prev
                    pos_bomb[i] = pos_bomb_prev[i]
                    change = True

        for i in range(len(pos_bomb)):
            cur_value = game_data[pos_bomb_prev[i]]
            life = int(cur_value % 10) - 1
            if 20 < game_data[pos_bomb[i]] < 30:
                life = 0
            strength = int((cur_value % 100) / 10)
            direction = EnvSimulator.get_direction(pos_bomb[i],
                                                   pos_bomb_prev[i]).value
            player = int((cur_value % 10000) / 1000)
            if player > 2:
                player -= 2
            if pos_agent0 == pos_bomb[i] or pos_agent1 == pos_bomb[i]:
                player += 2

            game_data[pos_bomb_prev[i]] = 0
            game_data[pos_bomb[
                i]] = 10000 + player * 1000 + direction * 100 + strength * 10 + life

        # set agent
        #print(pos_agent0, pos_agent1)
        EnvSimulator._agent_collect(game_data, 0, pos_agent0)
        EnvSimulator._agent_collect(game_data, 1, pos_agent1)

        if pos_agent0_prev != pos_agent0:
            if game_data[pos_agent0_prev] < 10000:
                game_data[pos_agent0_prev] = 0
            if EnvSimulator._is_fire(game_data, pos_agent0):
                EnvSimulator._agent_died(game_data, 0)
            else:
                game_data[pos_agent0] = AGENT_0

        if pos_agent1_prev != pos_agent1:
            if game_data[pos_agent1_prev] < 10000:
                game_data[pos_agent1_prev] = 0
            if EnvSimulator._is_fire(game_data, pos_agent1):
                EnvSimulator._agent_died(game_data, 1)
            else:
                game_data[pos_agent1] = AGENT_1

        # fire bombs
        fire = True
        while fire:
            fire = False
            for bomb in pos_bomb:
                bomb_value = game_data[bomb]
                if int(bomb_value % 10) == 0:
                    strength = int((bomb_value % 100) / 10)
                    EnvSimulator._set_fire(game_data, bomb[0], bomb[1], True)
                    EnvSimulator._fire_bomb(game_data, bomb[0], bomb[1], 0, 1,
                                            strength - 1)  # right
                    EnvSimulator._fire_bomb(game_data, bomb[0], bomb[1], 0, -1,
                                            strength - 1)  # left
                    EnvSimulator._fire_bomb(game_data, bomb[0], bomb[1], 1, 0,
                                            strength - 1)  # down
                    EnvSimulator._fire_bomb(game_data, bomb[0], bomb[1], -1, 0,
                                            strength - 1)  # up
                    fire = True