예제 #1
0
    def get_action(agent_index, state: GameState):
        enemy_state = state.get_enemy(agent_index)
        enemy_direction = enemy_state.get_direction()

        possible = state.get_legal_actions(agent_index)

        if enemy_direction in possible:
            return enemy_direction

        if Directions.REVERSE.get(enemy_direction,
                                  Directions.STOP) != Directions.STOP:
            return Directions.REVERSE.get(enemy_direction)

        return random.choice(possible)
예제 #2
0
 def __init__(self, map_name='default_map', end_step=1000):
     self.map = Map(map_name)
     self.state = GameState(self.map)
     self.end_step = end_step
     self.action_space = gym.spaces.Discrete(6)
     # for to_observation()
     # self.obs_shape = (self.map.width, self.map.height, 8)
     # for to_observation_(shape)
     self.obs_shape = (self.map.shape[0], self.map.shape[1], 1)
     self.observation_space = gym.spaces.Box(low=0,
                                             high=8,
                                             shape=self.obs_shape,
                                             dtype=np.int8)
     self.reset()
예제 #3
0
def test_enemy_agents(state: GameState):
    assert EnemyAgents.get_action(1, state) == "West"
    state.generate_successor(1, "West")

    assert EnemyAgents.get_action(1, state) == "West"
    state.generate_successor(1, "West")

    assert EnemyAgents.get_action(1, state) == "West"
    state.generate_successor(1, "West")

    assert EnemyAgents.get_action(1, state) == "East"
    state.generate_successor(1, "East")

    assert EnemyAgents.get_action(1, state) == "East"
    state.generate_successor(1, "East")

    assert EnemyAgents.get_action(1, state) == "East"
예제 #4
0
def test_actions(state: GameState):
    state.reset()
    assert Actions.get_action_with_index(0) == 'North'
    assert Actions.get_action_with_index(3) == 'West'
    with pytest.raises(Exception) as ex:
        Actions.get_action_with_index(-1)
    assert "Invalid action index!" in str(ex.value)

    assert Actions.reverse_direction('North') == 'South'
    assert Actions.reverse_direction('South') == 'North'
    assert Actions.reverse_direction('East') == 'West'
    assert Actions.reverse_direction('West') == 'East'

    assert Actions.vector_to_direction((0, 1)) == 'North'
    assert Actions.vector_to_direction((0, -1)) == 'South'
    assert Actions.vector_to_direction((1, 0)) == 'East'
    assert Actions.vector_to_direction((-1, 0)) == 'West'
    assert Actions.vector_to_direction((0, 0)) == 'Stop'

    assert Actions.direction_to_vector('East') == (1, 0)

    assert Actions.get_possible_actions(state.agent_states[0], state) == ['East', 'West', 'Stop']
    bomberman = state.get_bomberman()
    x, y = bomberman.get_position()
    direction = bomberman.get_direction()
    bomberman.configuration.pos = (x + 0.5, y)

    assert Actions.get_possible_actions(bomberman, state) == [direction]
    state.reset()

    state.get_enemy(1).configuration.pos = (x + 1, y)
    assert Actions.get_possible_actions(state.get_bomberman(), state, True) == ['West', 'Stop']
예제 #5
0
def test_pacman_rules(state: GameState):
    state.reset()
    assert BombermanRules.get_legal_actions(state) == ['East', 'West', 'Stop', 'Bomb']
    assert BombermanRules.get_legal_actions(state, True) == ['East', 'West', 'Stop', 'Bomb']

    # not in legal
    BombermanRules.apply_action(state, 'North')
    assert state.get_bomberman().get_direction() == 'Stop'

    state.get_bomberman().configuration.direction = 'North'
    BombermanRules.apply_action(state, 'North')
    assert state.get_bomberman().get_direction() == 'North'

    # legal
    BombermanRules.apply_action(state, 'East')
    assert state.get_bomberman().get_direction() == 'East'

    # Bomb
    BombermanRules.apply_action(state, 'Bomb')
    assert BombermanRules.get_legal_actions(state) == ['East', 'West']

    state.reset()
예제 #6
0
def test_ghost_rules(state: GameState):
    state.reset()
    assert EnemyRules.get_legal_actions(state, 1) == ['West']

    EnemyRules.apply_action(state, "Stop", 1)

    EnemyRules.apply_action(state, 'West', 1)
    assert state.get_enemy(1).get_direction() == 'West'

    assert EnemyRules.can_kill((0, 0), (0, 0))
    EnemyRules.check_death(state, 0)
    EnemyRules.check_death(state, 1)
    state.get_enemy(1).configuration = state.get_bomberman().configuration

    # kill bomberman
    EnemyRules.check_death(state, 1)
    assert state.is_lose()
    state.reset()

    state.get_enemy(1).configuration = state.get_bomberman().configuration
    # kill bomberman
    EnemyRules.check_death(state, 0)
    assert state.is_lose()
예제 #7
0
def test_game_state(state: GameState):
    with pytest.raises(Exception) as ex:
        state.get_enemy(0)
    assert 'Invalid index' in str(ex.value)

    assert not state.get_enemy(1).is_bomberman

    assert state.get_bomberman().is_bomberman

    # test win and lose
    with pytest.raises(Exception) as ex:
        state._win = True
        state.generate_successor(0, "East")

    assert "Can\'t generate a successor of a terminal state." in str(ex.value)

    assert state.get_legal_actions(0) == []

    state.reset()
    # test win and lose
    with pytest.raises(Exception) as ex:
        state._lose = True
        state.generate_successor(0, "East")

    assert "Can\'t generate a successor of a terminal state." in str(ex.value)

    state.reset()
    assert state.get_legal_actions(0, True) == ["East", "West", "Stop", "Bomb"]
    assert state.get_legal_actions(1) == ["West"]

    state.reset()
    state.generate_successor(0, 'Bomb')

    assert np.sum(
        state.to_observation()
    ) == len(state.get_bombs()) + len(state.agent_states) + state.layout.walls.count() + state.layout.bricks.count()

    state.generate_successor(0, 'East')
    assert state.layout.shape == (5, 9)
    assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[2] == state.layout.walls.count()
    assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[3] == state.layout.bricks.count()
    assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[4] == 1
    assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[5] == 1
    state.generate_successor(0, 'Bomb')
    state.generate_successor(0, 'East')
    assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[5] == 2
    state.generate_successor(0, 'East')
    assert str(state) == """% % % % % % % % %
%       E #     %
%   % # % # %   %
%     0 o   B   %
% % % % % % % % %"""
    assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[5] == 1
    assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[6] == 1
    state.generate_successor(0, 'East')
    assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[7] == 1
    print(state)
    assert str(state) == """% % % % % % % % %
%       E #     %
%   % # % # %   %
%     O 0     B %
% % % % % % % % %"""
    state.generate_successor(0, 'East')

    assert str(state) == """% % % % % % % % %
%       E #     %
%   %   % # %   %
% * * * * * * B %
% % % % % % % % %"""

    assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[8] == 6
    assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[3] == state.layout.bricks.count()
예제 #8
0
class BaseEnv(gym.Env):
    """
    A bomberman environment.

    : param map_name: (str) 要運行的地圖和相關資訊
    : param end_step: (int) 每個回合,最多可運行的步數
    """
    def __init__(self, map_name='default_map', end_step=1000):
        self.map = Map(map_name)
        self.state = GameState(self.map)
        self.end_step = end_step
        self.action_space = gym.spaces.Discrete(6)
        # for to_observation()
        # self.obs_shape = (self.map.width, self.map.height, 8)
        # for to_observation_(shape)
        self.obs_shape = (self.map.shape[0], self.map.shape[1], 1)
        self.observation_space = gym.spaces.Box(low=0,
                                                high=8,
                                                shape=self.obs_shape,
                                                dtype=np.int8)
        self.reset()

    def reset(self):
        """
        Reset environment.
        """
        self.state.reset()
        self.current_step = 0

        return self.state.to_observation_(self.obs_shape)

    def step(self, action_index):
        """
        Tell the environment which action to do.

        : param action: (int) 要執行的動作
        """
        action = Actions.get_action_with_index(action_index)
        self.apply_action(action)
        reward = self.get_reward()
        if action == 'Bomb':
            reward = 0.01
        done = self.is_done()

        obs = self.state.to_observation_(self.obs_shape)
        self.current_step += 1

        return obs, reward, done, {}

    def render(self, delay_time=0.5, pause=False):
        """
        Print environment.

        : param delay_time: (float) 每次打印要延遲的時間
        """
        # for windows
        if os.name == 'nt':
            _ = os.system('cls')
        # for mac and linux(here, os.name is 'posix')
        else:
            _ = os.system('clear')

        print(self.state)
        print('score: {}'.format(self.state.score) if not pause else "Pause")
        time.sleep(delay_time)

    def get_reward(self):
        """
        Give rewards based on actions state.

        """
        _reward = 0

        for reward in self.state.score_item:
            if reward == 10:
                _reward += 10.  # Blow up brick

            elif reward == 200:
                _reward += 20.  # Kill enemy

            elif reward < -10:
                _reward += -50.  # Dead  (Ouch!) -500

            elif reward == 500:
                _reward += 50.  # Win kill all enemies

        if _reward == 0:
            _reward = -0.01  # Punish time (Pff..)

        return _reward

    def apply_action(self, action):
        if action != 'Bomb':
            for index in range(1, len(self.state.agent_states)):
                if not self.is_done():
                    self.state.generate_successor(
                        index, EnemyAgents.get_action(index, self.state))

        if not self.is_done():
            self.state.generate_successor(0, action)

    def is_done(self):
        """
        Check if this round is over.

        """
        return self.current_step >= self.end_step or self.state.is_win(
        ) or self.state.is_lose()