def __init__(self, config): r"""Creates an environment with the given game configuration. Args: config: dict, With parameters for the game. Config takes the following keys and values. - colors: int, Number of colors \in [2,5]. - ranks: int, Number of ranks \in [2,5]. - players: int, Number of players \in [2,5]. - hand_size: int, Hand size \in [4,5]. - max_information_tokens: int, Number of information tokens (>=0). - max_life_tokens: int, Number of life tokens (>=1). - observation_type: int. 0: Minimal observation. 1: First-order common knowledge observation. - seed: int, Random seed. - random_start_player: bool, Random start player. """ assert isinstance(config, dict), "Expected config to be of type dict." self.game = pyhanabi.HanabiGame(config) self.observation_encoder = pyhanabi.ObservationEncoder( self.game, pyhanabi.ObservationEncoderType.CANONICAL) self.players = self.game.num_players() self.hist = self.init_hist()
def __init__(self, config): r"""Creates an environment with the given game configuration. Args: config: dict, With parameters for the game. Config takes the following keys and values. - colors: int, Number of colors \in [2,5]. - ranks: int, Number of ranks \in [2,5]. - players: int, Number of players \in [2,5]. - hand_size: int, Hand size \in [4,5]. - max_information_tokens: int, Number of information tokens (>=0). - max_life_tokens: int, Number of life tokens (>=1). - observation_type: int. 0: Minimal observation. 1: First-order common knowledge observation. - seed: int, Random seed. - random_start_player: bool, Random start player. """ assert isinstance(config, dict), "Expected config to be of type dict." self.game = pyhanabi.HanabiGame(config) self.observation_encoder = pyhanabi.ObservationEncoder( self.game, pyhanabi.ObservationEncoderType.CANONICAL) self.players = self.game.num_players() # in case the game_config did not contain specific keys because they were meant to be defaulted config['hand_size'] = self.game.hand_size() config['num_players'] = self.game.num_players() config['num_colors'] = self.game.num_colors() config['num_ranks'] = self.game.num_ranks() self.reward_metrics = RewardMetrics(config) self.augment_input = USE_AUGMENTED_NETWORK_INPUTS_WHEN_WRAPPING_ENV self.augment_input_using_binary = USE_AUGMENTED_BINARY_INPUTS_WHEN_WRAPPING_ENV self.observation_augmenter = ObservationAugmenter(config, use_binary=self.augment_input_using_binary) self.OPEN_HANDS = OPEN_HANDS
def run_game(game_parameters): """Play a game, selecting random actions.""" def print_state(state): """Print some basic information about the state.""" print("") print("Current player: {}".format(state.cur_player())) print(state) # Example of more queries to provide more about this state. For # example, bots could use these methods to to get information # about the state in order to act accordingly. print("### Information about the state retrieved separately ###") print("### Information tokens: {}".format(state.information_tokens())) print("### Life tokens: {}".format(state.life_tokens())) print("### Fireworks: {}".format(state.fireworks())) print("### Deck size: {}".format(state.deck_size())) print("### Discard pile: {}".format(str(state.discard_pile()))) print("### Player hands: {}".format(str(state.player_hands()))) print("") def print_observation(observation): """Print some basic information about an agent observation.""" print("--- Observation ---") print(observation) print("### Information about the observation retrieved separately ###") print("### Current player, relative to self: {}".format( observation.cur_player_offset())) print("### Observed hands: {}".format(observation.observed_hands())) print("### Card knowledge: {}".format(observation.card_knowledge())) print("### Discard pile: {}".format(observation.discard_pile())) print("### Fireworks: {}".format(observation.fireworks())) print("### Deck size: {}".format(observation.deck_size())) move_string = "### Last moves:" for move_tuple in observation.last_moves(): move_string += " {}".format(move_tuple) print(move_string) print("### Information tokens: {}".format(observation.information_tokens())) print("### Life tokens: {}".format(observation.life_tokens())) print("### Legal moves: {}".format(observation.legal_moves())) print("--- EndObservation ---") def print_encoded_observations(encoder, state, num_players): print("--- EncodedObservations ---") print("Observation encoding shape: {}".format(encoder.shape())) print("Current actual player: {}".format(state.cur_player())) for i in range(num_players): print("Encoded observation for player {}: {}".format( i, encoder.encode(state.observation(i)))) print("--- EndEncodedObservations ---") game = pyhanabi.HanabiGame(game_parameters) print(game.parameter_string(), end="") obs_encoder = pyhanabi.ObservationEncoder( game, enc_type=pyhanabi.ObservationEncoderType.CANONICAL) state = game.new_initial_state() while not state.is_terminal(): if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID: state.deal_random_card() continue print_state(state) observation = state.observation(state.cur_player()) print_observation(observation) print_encoded_observations(obs_encoder, state, game.num_players()) legal_moves = state.legal_moves() print("") print("Number of legal moves: {}".format(len(legal_moves))) move = np.random.choice(legal_moves) print("Chose random legal move: {}".format(move)) state.apply_move(move) print("") print("Game done. Terminal state:") print("") print(state) print("") print("score: {}".format(state.score()))