def get_action(agent_index, state: GameState): enemy_state = state.get_enemy(agent_index) enemy_direction = enemy_state.get_direction() possible = state.get_legal_actions(agent_index) if enemy_direction in possible: return enemy_direction if Directions.REVERSE.get(enemy_direction, Directions.STOP) != Directions.STOP: return Directions.REVERSE.get(enemy_direction) return random.choice(possible)
def __init__(self, map_name='default_map', end_step=1000): self.map = Map(map_name) self.state = GameState(self.map) self.end_step = end_step self.action_space = gym.spaces.Discrete(6) # for to_observation() # self.obs_shape = (self.map.width, self.map.height, 8) # for to_observation_(shape) self.obs_shape = (self.map.shape[0], self.map.shape[1], 1) self.observation_space = gym.spaces.Box(low=0, high=8, shape=self.obs_shape, dtype=np.int8) self.reset()
def test_enemy_agents(state: GameState): assert EnemyAgents.get_action(1, state) == "West" state.generate_successor(1, "West") assert EnemyAgents.get_action(1, state) == "West" state.generate_successor(1, "West") assert EnemyAgents.get_action(1, state) == "West" state.generate_successor(1, "West") assert EnemyAgents.get_action(1, state) == "East" state.generate_successor(1, "East") assert EnemyAgents.get_action(1, state) == "East" state.generate_successor(1, "East") assert EnemyAgents.get_action(1, state) == "East"
def test_actions(state: GameState): state.reset() assert Actions.get_action_with_index(0) == 'North' assert Actions.get_action_with_index(3) == 'West' with pytest.raises(Exception) as ex: Actions.get_action_with_index(-1) assert "Invalid action index!" in str(ex.value) assert Actions.reverse_direction('North') == 'South' assert Actions.reverse_direction('South') == 'North' assert Actions.reverse_direction('East') == 'West' assert Actions.reverse_direction('West') == 'East' assert Actions.vector_to_direction((0, 1)) == 'North' assert Actions.vector_to_direction((0, -1)) == 'South' assert Actions.vector_to_direction((1, 0)) == 'East' assert Actions.vector_to_direction((-1, 0)) == 'West' assert Actions.vector_to_direction((0, 0)) == 'Stop' assert Actions.direction_to_vector('East') == (1, 0) assert Actions.get_possible_actions(state.agent_states[0], state) == ['East', 'West', 'Stop'] bomberman = state.get_bomberman() x, y = bomberman.get_position() direction = bomberman.get_direction() bomberman.configuration.pos = (x + 0.5, y) assert Actions.get_possible_actions(bomberman, state) == [direction] state.reset() state.get_enemy(1).configuration.pos = (x + 1, y) assert Actions.get_possible_actions(state.get_bomberman(), state, True) == ['West', 'Stop']
def test_pacman_rules(state: GameState): state.reset() assert BombermanRules.get_legal_actions(state) == ['East', 'West', 'Stop', 'Bomb'] assert BombermanRules.get_legal_actions(state, True) == ['East', 'West', 'Stop', 'Bomb'] # not in legal BombermanRules.apply_action(state, 'North') assert state.get_bomberman().get_direction() == 'Stop' state.get_bomberman().configuration.direction = 'North' BombermanRules.apply_action(state, 'North') assert state.get_bomberman().get_direction() == 'North' # legal BombermanRules.apply_action(state, 'East') assert state.get_bomberman().get_direction() == 'East' # Bomb BombermanRules.apply_action(state, 'Bomb') assert BombermanRules.get_legal_actions(state) == ['East', 'West'] state.reset()
def test_ghost_rules(state: GameState): state.reset() assert EnemyRules.get_legal_actions(state, 1) == ['West'] EnemyRules.apply_action(state, "Stop", 1) EnemyRules.apply_action(state, 'West', 1) assert state.get_enemy(1).get_direction() == 'West' assert EnemyRules.can_kill((0, 0), (0, 0)) EnemyRules.check_death(state, 0) EnemyRules.check_death(state, 1) state.get_enemy(1).configuration = state.get_bomberman().configuration # kill bomberman EnemyRules.check_death(state, 1) assert state.is_lose() state.reset() state.get_enemy(1).configuration = state.get_bomberman().configuration # kill bomberman EnemyRules.check_death(state, 0) assert state.is_lose()
def test_game_state(state: GameState): with pytest.raises(Exception) as ex: state.get_enemy(0) assert 'Invalid index' in str(ex.value) assert not state.get_enemy(1).is_bomberman assert state.get_bomberman().is_bomberman # test win and lose with pytest.raises(Exception) as ex: state._win = True state.generate_successor(0, "East") assert "Can\'t generate a successor of a terminal state." in str(ex.value) assert state.get_legal_actions(0) == [] state.reset() # test win and lose with pytest.raises(Exception) as ex: state._lose = True state.generate_successor(0, "East") assert "Can\'t generate a successor of a terminal state." in str(ex.value) state.reset() assert state.get_legal_actions(0, True) == ["East", "West", "Stop", "Bomb"] assert state.get_legal_actions(1) == ["West"] state.reset() state.generate_successor(0, 'Bomb') assert np.sum( state.to_observation() ) == len(state.get_bombs()) + len(state.agent_states) + state.layout.walls.count() + state.layout.bricks.count() state.generate_successor(0, 'East') assert state.layout.shape == (5, 9) assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[2] == state.layout.walls.count() assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[3] == state.layout.bricks.count() assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[4] == 1 assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[5] == 1 state.generate_successor(0, 'Bomb') state.generate_successor(0, 'East') assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[5] == 2 state.generate_successor(0, 'East') assert str(state) == """% % % % % % % % % % E # % % % # % # % % % 0 o B % % % % % % % % % %""" assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[5] == 1 assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[6] == 1 state.generate_successor(0, 'East') assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[7] == 1 print(state) assert str(state) == """% % % % % % % % % % E # % % % # % # % % % O 0 B % % % % % % % % % %""" state.generate_successor(0, 'East') assert str(state) == """% % % % % % % % % % E # % % % % # % % % * * * * * * B % % % % % % % % % %""" assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[8] == 6 assert np.bincount(state.to_observation_((5, 9, 1)).reshape((-1)).astype(np.int8))[3] == state.layout.bricks.count()
class BaseEnv(gym.Env): """ A bomberman environment. : param map_name: (str) 要運行的地圖和相關資訊 : param end_step: (int) 每個回合,最多可運行的步數 """ def __init__(self, map_name='default_map', end_step=1000): self.map = Map(map_name) self.state = GameState(self.map) self.end_step = end_step self.action_space = gym.spaces.Discrete(6) # for to_observation() # self.obs_shape = (self.map.width, self.map.height, 8) # for to_observation_(shape) self.obs_shape = (self.map.shape[0], self.map.shape[1], 1) self.observation_space = gym.spaces.Box(low=0, high=8, shape=self.obs_shape, dtype=np.int8) self.reset() def reset(self): """ Reset environment. """ self.state.reset() self.current_step = 0 return self.state.to_observation_(self.obs_shape) def step(self, action_index): """ Tell the environment which action to do. : param action: (int) 要執行的動作 """ action = Actions.get_action_with_index(action_index) self.apply_action(action) reward = self.get_reward() if action == 'Bomb': reward = 0.01 done = self.is_done() obs = self.state.to_observation_(self.obs_shape) self.current_step += 1 return obs, reward, done, {} def render(self, delay_time=0.5, pause=False): """ Print environment. : param delay_time: (float) 每次打印要延遲的時間 """ # for windows if os.name == 'nt': _ = os.system('cls') # for mac and linux(here, os.name is 'posix') else: _ = os.system('clear') print(self.state) print('score: {}'.format(self.state.score) if not pause else "Pause") time.sleep(delay_time) def get_reward(self): """ Give rewards based on actions state. """ _reward = 0 for reward in self.state.score_item: if reward == 10: _reward += 10. # Blow up brick elif reward == 200: _reward += 20. # Kill enemy elif reward < -10: _reward += -50. # Dead (Ouch!) -500 elif reward == 500: _reward += 50. # Win kill all enemies if _reward == 0: _reward = -0.01 # Punish time (Pff..) return _reward def apply_action(self, action): if action != 'Bomb': for index in range(1, len(self.state.agent_states)): if not self.is_done(): self.state.generate_successor( index, EnemyAgents.get_action(index, self.state)) if not self.is_done(): self.state.generate_successor(0, action) def is_done(self): """ Check if this round is over. """ return self.current_step >= self.end_step or self.state.is_win( ) or self.state.is_lose()