def __init__(self,
                 config,
                 gamma=0.95,
                 history_size=4,
                 alternative_reward=False):
        r"""Creates an environment with the given game configuration.

        Args:
            config: dict, With parameters for the game. Config takes the following
                keys and values.
                    - colors: int, Number of colors \in [2,5].
                    - ranks: int, Number of ranks \in [2,5].
                    - players: int, Number of players \in [2,5].
                    - hand_size: int, Hand size \in [4,5].
                    - max_information_tokens: int, Number of information tokens (>=0).
                    - max_life_tokens: int, Number of life tokens (>=1).
                    - observation_type: int.
                        0: Minimal observation.
                        1: First-order common knowledge observation.
                    - seed: int, Random seed.
                    - random_start_player: bool, Random start player.
            gamma: float in [0,1], discount parameter for past rewards
            history_size: int, number of past observations to concatenate (1 means only current obs)
            alternative_reward: bool, flag for alternative reward, 
                    i.e.  reward at end of game = achieved points - max_points + 1
        """
        super().__init__()
        assert isinstance(config, dict), "Expected config to be of type dict."
        self.game = pyhanabi.HanabiGame(config)
        n_colors = config['colors']
        self.gamma = gamma
        self.alternative_reward = alternative_reward
        self.observation_encoder = pyhanabi.ObservationEncoder(
            self.game, pyhanabi.ObservationEncoderType.CANONICAL)
        self.players = self.game.num_players()
        self.obs_stacker = create_obs_stacker(self, history_size=history_size)
        self._observation_spec = {
            'observations':
            array_spec.ArraySpec(shape=(self.obs_stacker.observation_size(), ),
                                 dtype=np.float64),
            'legal_moves':
            array_spec.ArraySpec(shape=(self.num_moves(), ), dtype=np.bool_),
            'game_obs':
            array_spec.ArraySpec(shape=(5 + n_colors, ), dtype=np.int64),
            'hand_obs':
            array_spec.ArraySpec(shape=(2, n_colors, 2), dtype=np.int64),
            'knowledge_obs':
            array_spec.ArraySpec(shape=(2, n_colors, 2), dtype=np.int64)
        }
        self._action_spec = array_spec.BoundedArraySpec(
            shape=(), dtype=np.int_, minimum=0, maximum=self.num_moves() - 1)
Beispiel #2
0
 def __init__(self, config):
     # Initialize
     self.config = config
     # Extract max info tokens or set default to 8.
     self.max_information_tokens = config.get('information_tokens', 8)
     # Set up card identifier
     self.card_identifier = HanabiCardIdentifier(.05,
                                                 self.feature_extractor,
                                                 config,
                                                 activator='relu')
     # Set up encoder
     self.encoder = pyhanabi.ObservationEncoder(pyhanabi.HanabiGame(config))
     self.agent = BaselineAgent(config)
     if config['print'] == 1:
         self.card_identifier.printt = 1
def mb_run_game(game_parameters):
  ''' Run a game with Michael edits.
  :param game_parameters: Dictionary:
  Possible parameters include
    "players": 2 <= number of players <= 5
    "colors": 1 <= number of different card colors in deck <= 5
    "rank": 1 <= number of different card ranks in deck <= 5
    "hand_size": 1 <= number of cards in player hand
    "max_information_tokens": 1 <= maximum (and initial) number of info tokens.
    "max_life_tokens": 1 <= maximum (and initial) number of life tokens.
    "seed": random number seed. -1 to use system random device to get seed.
    "random_start_player": boolean. If true, start with random player, not 0.
    "observation_type": int AgentObservationType.
    """
  :return: null
  '''
  # pyhanabi is the python C++ wrapper for the game.
  # Need to use it as an API into game interface.
  game = pyhanabi.HanabiGame(game_parameters)
  print(game.parameter_string(), end="")
  obs_encoder = pyhanabi.ObservationEncoder(
    game, enc_type=pyhanabi.ObservationEncoderType.CANONICAL)
  state = game.new_initial_state()
  while not state.is_terminal():
    if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID:
      state.deal_random_card()
      continue

    observation = state.observation(state.cur_player())
    legal_moves = state.legal_moves()
    print("")
    # print("Number of legal moves: {}".format(len(legal_moves)))
    move = np.random.choice(legal_moves)
    print("Chose random legal move: {}".format(move))
    state.apply_move(move)

  print("")
  print("Game done. Terminal state:")
  print("")
  print(state)
  print("")
    def __init__(self, config):
        r"""Creates an environment with the given game configuration.

    Args:
      config: dict, With parameters for the game. Config takes the following
        keys and values.
          - colors: int, Number of colors \in [2,5].
          - ranks: int, Number of ranks \in [2,5].
          - players: int, Number of players \in [2,5].
          - hand_size: int, Hand size \in [4,5].
          - max_information_tokens: int, Number of information tokens (>=0).
          - max_life_tokens: int, Number of life tokens (>=1).
          - observation_type: int.
            0: Minimal observation.
            1: First-order common knowledge observation.
          - seed: int, Random seed.
          - random_start_player: bool, Random start player.
    """
        assert isinstance(config, dict), "Expected config to be of type dict."
        self.game = pyhanabi.HanabiGame(config)

        self.observation_encoder = pyhanabi.ObservationEncoder(
            self.game, pyhanabi.ObservationEncoderType.CANONICAL)
        self.players = self.game.num_players()

        self._action_spec = tensor_spec.BoundedTensorSpec(
            shape=(),
            dtype=tf.int32,
            minimum=0,
            maximum=self.num_moves() - 1,
            name='action')
        self._observation_spec = tensor_spec.BoundedTensorSpec(
            shape=(self.vectorized_observation_shape()[0], ),
            dtype=tf.int32,
            minimum=0,
            maximum=1,
            name='observation')
def run_game(game_parameters):
  """Play a game, selecting random actions."""

  def print_state(state):
    """Print some basic information about the state."""
    print("")
    print("Current player: {}".format(state.cur_player()))
    print(state)

    # Example of more queries to provide more about this state. For
    # example, bots could use these methods to to get information
    # about the state in order to act accordingly.
    print("### Information about the state retrieved separately ###")
    print("### Information tokens: {}".format(state.information_tokens()))
    print("### Life tokens: {}".format(state.life_tokens()))
    print("### Fireworks: {}".format(state.fireworks()))
    print("### Deck size: {}".format(state.deck_size()))
    print("### Discard pile: {}".format(str(state.discard_pile())))
    print("### Player hands: {}".format(str(state.player_hands())))
    print("")

  def print_observation(observation):
    """Print some basic information about an agent observation."""
    print("--- Observation ---")
    print(observation)

    print("### Information about the observation retrieved separately ###")
    print("### Current player, relative to self: {}".format(
        observation.cur_player_offset()))
    print("### Observed hands: {}".format(observation.observed_hands()))
    print("### Card knowledge: {}".format(observation.card_knowledge()))
    print("### Discard pile: {}".format(observation.discard_pile()))
    print("### Fireworks: {}".format(observation.fireworks()))
    print("### Deck size: {}".format(observation.deck_size()))
    move_string = "### Last moves:"
    for move_tuple in observation.last_moves():
      move_string += " {}".format(move_tuple)
    print(move_string)
    print("### Information tokens: {}".format(observation.information_tokens()))
    print("### Life tokens: {}".format(observation.life_tokens()))
    print("### Legal moves: {}".format(observation.legal_moves()))
    print("--- EndObservation ---")

  def print_encoded_observations(encoder, state, num_players):
    print("--- EncodedObservations ---")
    print("Observation encoding shape: {}".format(encoder.shape()))
    print("Current actual player: {}".format(state.cur_player()))
    for i in range(num_players):
      print("Encoded observation for player {}: {}".format(
          i, encoder.encode(state.observation(i))))
    print("--- EndEncodedObservations ---")

  game = pyhanabi.HanabiGame(game_parameters)
  print(game.parameter_string(), end="")
  obs_encoder = pyhanabi.ObservationEncoder(
      game, enc_type=pyhanabi.ObservationEncoderType.CANONICAL)

  state = game.new_initial_state()
  while not state.is_terminal():
    if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID:
      state.deal_random_card()
      continue

    print_state(state)

    observation = state.observation(state.cur_player())
    print_observation(observation)
    print_encoded_observations(obs_encoder, state, game.num_players())

    legal_moves = state.legal_moves()
    print("")
    print("Number of legal moves: {}".format(len(legal_moves)))

    move = np.random.choice(legal_moves)
    print("Chose random legal move: {}".format(move))

    state.apply_move(move)

  print("")
  print("Game done. Terminal state:")
  print("")
  print(state)
  print("")
  print("score: {}".format(state.score()))
def run_game(config):
    """Play a game, selecting random actions."""
    def print_state(state):
        """Print some basic information about the state."""
        print("")
        print("Current player: {}".format(state.cur_player()))
        print(state)

        # Example of more queries to provide more about this state. For
        # example, bots could use these methods to to get information
        # about the state in order to act accordingly.
        print("### Information about the state retrieved separately ###")
        print("### Information tokens: {}".format(state.information_tokens()))
        print("### Life tokens: {}".format(state.life_tokens()))
        print("### Fireworks: {}".format(state.fireworks()))
        print("### Deck size: {}".format(state.deck_size()))
        print("### Discard pile: {}".format(str(state.discard_pile())))
        print("### Player hands: {}".format(str(state.player_hands())))
        print("")

    def print_observation(observation):
        """Print some basic information about an agent observation."""
        print("--- Observation ---")
        print(observation)

        print("### Information about the observation retrieved separately ###")
        print("### Current player, relative to self: {}".format(
            observation.cur_player_offset()))
        print("### Observed hands: {}".format(observation.observed_hands()))
        print("### Card knowledge: {}".format(observation.card_knowledge()))
        print("### Discard pile: {}".format(observation.discard_pile()))
        print("### Fireworks: {}".format(observation.fireworks()))
        print("### Deck size: {}".format(observation.deck_size()))
        move_string = "### Last moves:"
        for move_tuple in observation.last_moves():
            move_string += " {}".format(move_tuple)
        print(move_string)
        print("### Information tokens: {}".format(
            observation.information_tokens()))
        print("### Life tokens: {}".format(observation.life_tokens()))
        print("### Legal moves: {}".format(observation.legal_moves()))
        print("--- EndObservation ---")

    def print_encoded_observations(encoder, state, num_players):
        print("--- EncodedObservations ---")
        print("Observation encoding shape: {}".format(encoder.shape()))
        print("Current actual player: {}".format(state.cur_player()))
        for i in range(num_players):
            print("Encoded observation for player {}: {}".format(
                i, encoder.encode(state.observation(i))))
        print("--- EndEncodedObservations ---")

    ### Creating an instance of the Hanabi game
    game = pyhanabi.HanabiGame(config)
    #print(game.parameter_string(), end="")
    obs_encoder = pyhanabi.ObservationEncoder(
        game, enc_type=pyhanabi.ObservationEncoderType.CANONICAL)

    ### Initialize players
    players_number = config['players']
    players = [
        RedRanger(config, game, playerIndex)
        for playerIndex in range(players_number)
    ]

    ### Initialize the state of the Hanabi Game
    state = game.new_initial_state()

    ### Main loop of the game
    nb_round = 1
    while not state.is_terminal():
        if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID:
            state.deal_random_card()
            continue

        active_player = players[state.cur_player()]

        ##print
        print(bcolors.BACKGROUNDGREEN +
              "------------------------------ ROUND " + str(nb_round) +
              " --------------------------------" + bcolors.WHITE)
        nb_round += 1
        print("")
        print(bcolors.LIGHTGREEN + "ACTIVE PLAYER: " +
              str(state.cur_player()) + bcolors.WHITE)
        print("")
        ##

        observation = state.observation(state.cur_player())
        legal_moves = state.legal_moves()
        move = active_player.act(observation)

        ##print
        print(bcolors.LIGHTGREEN + "PLAYER " + str(state.cur_player()) +
              " HAND" + bcolors.WHITE)
        print(state.player_hands()[state.cur_player()])
        print("")
        print(bcolors.LIGHTGREEN + "GAME STATE" + bcolors.WHITE)
        print(bcolors.LIGHTRED + "Fireworks: " + bcolors.WHITE +
              str(state.fireworks()) + "   (number of cards in each stack)")
        print(bcolors.LIGHTRED + "Player Hands: " + bcolors.WHITE +
              str(state.player_hands()))
        print(bcolors.LIGHTRED + "Discard Pile: " + bcolors.WHITE +
              str(state.discard_pile()))
        print(bcolors.LIGHTRED + "Info Tokens: " + bcolors.WHITE +
              str(state.information_tokens()))
        print(bcolors.LIGHTRED + "Life Tokens: " + bcolors.WHITE +
              str(state.life_tokens()))
        print("")
        print(bcolors.LIGHTGREEN + "MOVE PLAYED" + bcolors.WHITE)
        print("Chose random legal move: {}".format(move))
        print("")
        ##

        state.apply_move(move)

    print("")
    print("Game done. Terminal state:")
    print("")
    print(state)
    print("")
    print("score: {}".format(state.score()))
def run_rlgame(config):
    """Play a game, selecting random actions."""

    def print_state(state):
        """Print some basic information about the state."""
        print("")
        print("Current player: {}".format(state.cur_player()))
        print(state)

        # Example of more queries to provide more about this state. For
        # example, bots could use these methods to to get information
        # about the state in order to act accordingly.
        print("### Information about the state retrieved separately ###")
        print("### Information tokens: {}".format(state.information_tokens()))
        print("### Life tokens: {}".format(state.life_tokens()))
        print("### Fireworks: {}".format(state.fireworks()))
        print("### Deck size: {}".format(state.deck_size()))
        print("### Discard pile: {}".format(str(state.discard_pile())))
        print("### Player hands: {}".format(str(state.player_hands())))
        print("")

    def print_observation(observation):
        """Print some basic information about an agent observation."""
        print("--- Observation ---")
        print(observation)

        print("### Information about the observation retrieved separately ###")
        print("### Current player, relative to self: {}".format(
            observation.cur_player_offset()))
        print("### Observed hands: {}".format(observation.observed_hands()))
        print("### Card knowledge: {}".format(observation.card_knowledge()))
        print("### Discard pile: {}".format(observation.discard_pile()))
        print("### Fireworks: {}".format(observation.fireworks()))
        print("### Deck size: {}".format(observation.deck_size()))
        move_string = "### Last moves:"
        for move_tuple in observation.last_moves():
          move_string += " {}".format(move_tuple)
        print(move_string)
        print("### Information tokens: {}".format(observation.information_tokens()))
        print("### Life tokens: {}".format(observation.life_tokens()))
        print("### Legal moves: {}".format(observation.legal_moves()))
        print("--- EndObservation ---")

    def print_encoded_observations(encoder, state, num_players):
        print("--- EncodedObservations ---")
        print("Observation encoding shape: {}".format(encoder.shape()))
        print("Current actual player: {}".format(state.cur_player()))
        for i in range(num_players):
          print("Encoded observation for player {}: {}".format(
              i, encoder.encode(state.observation(i))))
        print("--- EndEncodedObservations ---")


    # environment = rl_env.make()
    # observation = environment.reset(config)
    # done = False
    # while not done:
    #     # Agent takes action
    #     action =  ...
    #     # Environment take a step
    #     observation, reward, done, info = environment.step(action)



    ### Creating an instance of the Hanabi game
    game = pyhanabi.HanabiGame(config)
    #print(game.parameter_string(), end="")
    obs_encoder = pyhanabi.ObservationEncoder(
      game,
      enc_type=pyhanabi.ObservationEncoderType.CANONICAL
    )


    ### Initialize players
    players_number = config['players']
    players = [RedRanger(config, game, playerIndex) for playerIndex in range(players_number)]


    ### Initialize the state of the Hanabi Game
    state = game.new_initial_state()


    ### Main loop of the game
    while not state.is_terminal():
        if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID:
          state.deal_random_card()
          continue

        active_player = players[state.cur_player()]
        #print_state(state)
        observation = state.observation(state.cur_player())
        #print_observation(observation)
        #print_encoded_observations(obs_encoder, state, game.num_players())
        legal_moves = state.legal_moves()
        # print("Number of legal moves: {}".format(len(legal_moves)))
        move = active_player.act(observation)
        print("\nPlayer: {}".format(state.cur_player()))
        print("Chose random legal move: {}".format(move))
        state.apply_move(move)

    print("")
    print("Game done. Terminal state:")
    print("")
    print(state)
    print("")
    print("score: {}".format(state.score()))