def __init__(self, config, gamma=0.95, history_size=4, alternative_reward=False): r"""Creates an environment with the given game configuration. Args: config: dict, With parameters for the game. Config takes the following keys and values. - colors: int, Number of colors \in [2,5]. - ranks: int, Number of ranks \in [2,5]. - players: int, Number of players \in [2,5]. - hand_size: int, Hand size \in [4,5]. - max_information_tokens: int, Number of information tokens (>=0). - max_life_tokens: int, Number of life tokens (>=1). - observation_type: int. 0: Minimal observation. 1: First-order common knowledge observation. - seed: int, Random seed. - random_start_player: bool, Random start player. gamma: float in [0,1], discount parameter for past rewards history_size: int, number of past observations to concatenate (1 means only current obs) alternative_reward: bool, flag for alternative reward, i.e. reward at end of game = achieved points - max_points + 1 """ super().__init__() assert isinstance(config, dict), "Expected config to be of type dict." self.game = pyhanabi.HanabiGame(config) n_colors = config['colors'] self.gamma = gamma self.alternative_reward = alternative_reward self.observation_encoder = pyhanabi.ObservationEncoder( self.game, pyhanabi.ObservationEncoderType.CANONICAL) self.players = self.game.num_players() self.obs_stacker = create_obs_stacker(self, history_size=history_size) self._observation_spec = { 'observations': array_spec.ArraySpec(shape=(self.obs_stacker.observation_size(), ), dtype=np.float64), 'legal_moves': array_spec.ArraySpec(shape=(self.num_moves(), ), dtype=np.bool_), 'game_obs': array_spec.ArraySpec(shape=(5 + n_colors, ), dtype=np.int64), 'hand_obs': array_spec.ArraySpec(shape=(2, n_colors, 2), dtype=np.int64), 'knowledge_obs': array_spec.ArraySpec(shape=(2, n_colors, 2), dtype=np.int64) } self._action_spec = array_spec.BoundedArraySpec( shape=(), dtype=np.int_, minimum=0, maximum=self.num_moves() - 1)
def __init__(self, config): # Initialize self.config = config # Extract max info tokens or set default to 8. self.max_information_tokens = config.get('information_tokens', 8) # Set up card identifier self.card_identifier = HanabiCardIdentifier(.05, self.feature_extractor, config, activator='relu') # Set up encoder self.encoder = pyhanabi.ObservationEncoder(pyhanabi.HanabiGame(config)) self.agent = BaselineAgent(config) if config['print'] == 1: self.card_identifier.printt = 1
def mb_run_game(game_parameters): ''' Run a game with Michael edits. :param game_parameters: Dictionary: Possible parameters include "players": 2 <= number of players <= 5 "colors": 1 <= number of different card colors in deck <= 5 "rank": 1 <= number of different card ranks in deck <= 5 "hand_size": 1 <= number of cards in player hand "max_information_tokens": 1 <= maximum (and initial) number of info tokens. "max_life_tokens": 1 <= maximum (and initial) number of life tokens. "seed": random number seed. -1 to use system random device to get seed. "random_start_player": boolean. If true, start with random player, not 0. "observation_type": int AgentObservationType. """ :return: null ''' # pyhanabi is the python C++ wrapper for the game. # Need to use it as an API into game interface. game = pyhanabi.HanabiGame(game_parameters) print(game.parameter_string(), end="") obs_encoder = pyhanabi.ObservationEncoder( game, enc_type=pyhanabi.ObservationEncoderType.CANONICAL) state = game.new_initial_state() while not state.is_terminal(): if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID: state.deal_random_card() continue observation = state.observation(state.cur_player()) legal_moves = state.legal_moves() print("") # print("Number of legal moves: {}".format(len(legal_moves))) move = np.random.choice(legal_moves) print("Chose random legal move: {}".format(move)) state.apply_move(move) print("") print("Game done. Terminal state:") print("") print(state) print("")
def __init__(self, config): r"""Creates an environment with the given game configuration. Args: config: dict, With parameters for the game. Config takes the following keys and values. - colors: int, Number of colors \in [2,5]. - ranks: int, Number of ranks \in [2,5]. - players: int, Number of players \in [2,5]. - hand_size: int, Hand size \in [4,5]. - max_information_tokens: int, Number of information tokens (>=0). - max_life_tokens: int, Number of life tokens (>=1). - observation_type: int. 0: Minimal observation. 1: First-order common knowledge observation. - seed: int, Random seed. - random_start_player: bool, Random start player. """ assert isinstance(config, dict), "Expected config to be of type dict." self.game = pyhanabi.HanabiGame(config) self.observation_encoder = pyhanabi.ObservationEncoder( self.game, pyhanabi.ObservationEncoderType.CANONICAL) self.players = self.game.num_players() self._action_spec = tensor_spec.BoundedTensorSpec( shape=(), dtype=tf.int32, minimum=0, maximum=self.num_moves() - 1, name='action') self._observation_spec = tensor_spec.BoundedTensorSpec( shape=(self.vectorized_observation_shape()[0], ), dtype=tf.int32, minimum=0, maximum=1, name='observation')
def run_game(game_parameters): """Play a game, selecting random actions.""" def print_state(state): """Print some basic information about the state.""" print("") print("Current player: {}".format(state.cur_player())) print(state) # Example of more queries to provide more about this state. For # example, bots could use these methods to to get information # about the state in order to act accordingly. print("### Information about the state retrieved separately ###") print("### Information tokens: {}".format(state.information_tokens())) print("### Life tokens: {}".format(state.life_tokens())) print("### Fireworks: {}".format(state.fireworks())) print("### Deck size: {}".format(state.deck_size())) print("### Discard pile: {}".format(str(state.discard_pile()))) print("### Player hands: {}".format(str(state.player_hands()))) print("") def print_observation(observation): """Print some basic information about an agent observation.""" print("--- Observation ---") print(observation) print("### Information about the observation retrieved separately ###") print("### Current player, relative to self: {}".format( observation.cur_player_offset())) print("### Observed hands: {}".format(observation.observed_hands())) print("### Card knowledge: {}".format(observation.card_knowledge())) print("### Discard pile: {}".format(observation.discard_pile())) print("### Fireworks: {}".format(observation.fireworks())) print("### Deck size: {}".format(observation.deck_size())) move_string = "### Last moves:" for move_tuple in observation.last_moves(): move_string += " {}".format(move_tuple) print(move_string) print("### Information tokens: {}".format(observation.information_tokens())) print("### Life tokens: {}".format(observation.life_tokens())) print("### Legal moves: {}".format(observation.legal_moves())) print("--- EndObservation ---") def print_encoded_observations(encoder, state, num_players): print("--- EncodedObservations ---") print("Observation encoding shape: {}".format(encoder.shape())) print("Current actual player: {}".format(state.cur_player())) for i in range(num_players): print("Encoded observation for player {}: {}".format( i, encoder.encode(state.observation(i)))) print("--- EndEncodedObservations ---") game = pyhanabi.HanabiGame(game_parameters) print(game.parameter_string(), end="") obs_encoder = pyhanabi.ObservationEncoder( game, enc_type=pyhanabi.ObservationEncoderType.CANONICAL) state = game.new_initial_state() while not state.is_terminal(): if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID: state.deal_random_card() continue print_state(state) observation = state.observation(state.cur_player()) print_observation(observation) print_encoded_observations(obs_encoder, state, game.num_players()) legal_moves = state.legal_moves() print("") print("Number of legal moves: {}".format(len(legal_moves))) move = np.random.choice(legal_moves) print("Chose random legal move: {}".format(move)) state.apply_move(move) print("") print("Game done. Terminal state:") print("") print(state) print("") print("score: {}".format(state.score()))
def run_game(config): """Play a game, selecting random actions.""" def print_state(state): """Print some basic information about the state.""" print("") print("Current player: {}".format(state.cur_player())) print(state) # Example of more queries to provide more about this state. For # example, bots could use these methods to to get information # about the state in order to act accordingly. print("### Information about the state retrieved separately ###") print("### Information tokens: {}".format(state.information_tokens())) print("### Life tokens: {}".format(state.life_tokens())) print("### Fireworks: {}".format(state.fireworks())) print("### Deck size: {}".format(state.deck_size())) print("### Discard pile: {}".format(str(state.discard_pile()))) print("### Player hands: {}".format(str(state.player_hands()))) print("") def print_observation(observation): """Print some basic information about an agent observation.""" print("--- Observation ---") print(observation) print("### Information about the observation retrieved separately ###") print("### Current player, relative to self: {}".format( observation.cur_player_offset())) print("### Observed hands: {}".format(observation.observed_hands())) print("### Card knowledge: {}".format(observation.card_knowledge())) print("### Discard pile: {}".format(observation.discard_pile())) print("### Fireworks: {}".format(observation.fireworks())) print("### Deck size: {}".format(observation.deck_size())) move_string = "### Last moves:" for move_tuple in observation.last_moves(): move_string += " {}".format(move_tuple) print(move_string) print("### Information tokens: {}".format( observation.information_tokens())) print("### Life tokens: {}".format(observation.life_tokens())) print("### Legal moves: {}".format(observation.legal_moves())) print("--- EndObservation ---") def print_encoded_observations(encoder, state, num_players): print("--- EncodedObservations ---") print("Observation encoding shape: {}".format(encoder.shape())) print("Current actual player: {}".format(state.cur_player())) for i in range(num_players): print("Encoded observation for player {}: {}".format( i, encoder.encode(state.observation(i)))) print("--- EndEncodedObservations ---") ### Creating an instance of the Hanabi game game = pyhanabi.HanabiGame(config) #print(game.parameter_string(), end="") obs_encoder = pyhanabi.ObservationEncoder( game, enc_type=pyhanabi.ObservationEncoderType.CANONICAL) ### Initialize players players_number = config['players'] players = [ RedRanger(config, game, playerIndex) for playerIndex in range(players_number) ] ### Initialize the state of the Hanabi Game state = game.new_initial_state() ### Main loop of the game nb_round = 1 while not state.is_terminal(): if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID: state.deal_random_card() continue active_player = players[state.cur_player()] ##print print(bcolors.BACKGROUNDGREEN + "------------------------------ ROUND " + str(nb_round) + " --------------------------------" + bcolors.WHITE) nb_round += 1 print("") print(bcolors.LIGHTGREEN + "ACTIVE PLAYER: " + str(state.cur_player()) + bcolors.WHITE) print("") ## observation = state.observation(state.cur_player()) legal_moves = state.legal_moves() move = active_player.act(observation) ##print print(bcolors.LIGHTGREEN + "PLAYER " + str(state.cur_player()) + " HAND" + bcolors.WHITE) print(state.player_hands()[state.cur_player()]) print("") print(bcolors.LIGHTGREEN + "GAME STATE" + bcolors.WHITE) print(bcolors.LIGHTRED + "Fireworks: " + bcolors.WHITE + str(state.fireworks()) + " (number of cards in each stack)") print(bcolors.LIGHTRED + "Player Hands: " + bcolors.WHITE + str(state.player_hands())) print(bcolors.LIGHTRED + "Discard Pile: " + bcolors.WHITE + str(state.discard_pile())) print(bcolors.LIGHTRED + "Info Tokens: " + bcolors.WHITE + str(state.information_tokens())) print(bcolors.LIGHTRED + "Life Tokens: " + bcolors.WHITE + str(state.life_tokens())) print("") print(bcolors.LIGHTGREEN + "MOVE PLAYED" + bcolors.WHITE) print("Chose random legal move: {}".format(move)) print("") ## state.apply_move(move) print("") print("Game done. Terminal state:") print("") print(state) print("") print("score: {}".format(state.score()))
def run_rlgame(config): """Play a game, selecting random actions.""" def print_state(state): """Print some basic information about the state.""" print("") print("Current player: {}".format(state.cur_player())) print(state) # Example of more queries to provide more about this state. For # example, bots could use these methods to to get information # about the state in order to act accordingly. print("### Information about the state retrieved separately ###") print("### Information tokens: {}".format(state.information_tokens())) print("### Life tokens: {}".format(state.life_tokens())) print("### Fireworks: {}".format(state.fireworks())) print("### Deck size: {}".format(state.deck_size())) print("### Discard pile: {}".format(str(state.discard_pile()))) print("### Player hands: {}".format(str(state.player_hands()))) print("") def print_observation(observation): """Print some basic information about an agent observation.""" print("--- Observation ---") print(observation) print("### Information about the observation retrieved separately ###") print("### Current player, relative to self: {}".format( observation.cur_player_offset())) print("### Observed hands: {}".format(observation.observed_hands())) print("### Card knowledge: {}".format(observation.card_knowledge())) print("### Discard pile: {}".format(observation.discard_pile())) print("### Fireworks: {}".format(observation.fireworks())) print("### Deck size: {}".format(observation.deck_size())) move_string = "### Last moves:" for move_tuple in observation.last_moves(): move_string += " {}".format(move_tuple) print(move_string) print("### Information tokens: {}".format(observation.information_tokens())) print("### Life tokens: {}".format(observation.life_tokens())) print("### Legal moves: {}".format(observation.legal_moves())) print("--- EndObservation ---") def print_encoded_observations(encoder, state, num_players): print("--- EncodedObservations ---") print("Observation encoding shape: {}".format(encoder.shape())) print("Current actual player: {}".format(state.cur_player())) for i in range(num_players): print("Encoded observation for player {}: {}".format( i, encoder.encode(state.observation(i)))) print("--- EndEncodedObservations ---") # environment = rl_env.make() # observation = environment.reset(config) # done = False # while not done: # # Agent takes action # action = ... # # Environment take a step # observation, reward, done, info = environment.step(action) ### Creating an instance of the Hanabi game game = pyhanabi.HanabiGame(config) #print(game.parameter_string(), end="") obs_encoder = pyhanabi.ObservationEncoder( game, enc_type=pyhanabi.ObservationEncoderType.CANONICAL ) ### Initialize players players_number = config['players'] players = [RedRanger(config, game, playerIndex) for playerIndex in range(players_number)] ### Initialize the state of the Hanabi Game state = game.new_initial_state() ### Main loop of the game while not state.is_terminal(): if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID: state.deal_random_card() continue active_player = players[state.cur_player()] #print_state(state) observation = state.observation(state.cur_player()) #print_observation(observation) #print_encoded_observations(obs_encoder, state, game.num_players()) legal_moves = state.legal_moves() # print("Number of legal moves: {}".format(len(legal_moves))) move = active_player.act(observation) print("\nPlayer: {}".format(state.cur_player())) print("Chose random legal move: {}".format(move)) state.apply_move(move) print("") print("Game done. Terminal state:") print("") print(state) print("") print("score: {}".format(state.score()))