Exemple #1
0
def get_observation_size(game_config):
    """ Returns the len of the vectorized observation """
    num_players = game_config['num_total_players']  # number of players ingame
    num_colors = game_config['colors']
    num_ranks = game_config['ranks']
    hand_size = game_config['hand_size']
    max_information_tokens = game_config['life_tokens']
    max_life_tokens = game_config['info_tokens']
    max_moves = game_config['max_moves']
    variant = game_config['variant']
    env = json_to_pyhanabi.create_env_mock(
        num_players=num_players,
        num_colors=num_colors,
        num_ranks=num_ranks,
        hand_size=hand_size,
        max_information_tokens=max_information_tokens,
        max_life_tokens=max_life_tokens,
        max_moves=max_moves,
        variant=variant)

    vec = vectorizer.ObservationVectorizer(env)
    legal_moves_vectorizer = vectorizer.LegalMovesVectorizer(env)
    return vec.total_state_length
Exemple #2
0
    def __init__(self, game_config):
        """
        # ################################################ #
        # -------------------- CONFIG -------------------- #
        # ################################################ #
        """
        self.agent_name = game_config[
            'username']  # used to identify absolute position on table
        self.num_players = game_config[
            'num_total_players']  # number of players ingame
        self.max_life_tokens = game_config['life_tokens']
        self.max_info_tokens = game_config['info_tokens']
        self.max_deck_size = game_config['deck_size']
        self.deck_size = self.max_deck_size
        self.life_tokens = self.max_life_tokens
        self.information_tokens = self.max_info_tokens

        self.players = None  # list of names of players currently ingame
        self.player_position = None  # agents absolute position at table
        self.agents_turn = False  # flag that is True whenever its our turn
        self.hand_size = 4 if self.num_players > 3 else 5  # deal 5 cards when playing with 2 or 3 ppl
        """
        # ################################################ #
        # ------- Observed Cards and Card knowledge ------ #
        # ################################################ #
        """
        """ New cards are prepended, that means agent 1s inital draw looks like [4,3,2,1] """
        # list of all players hands as _seen_ by calling agent [excluding clues]
        self.observed_hands = list(
        )  # is refreshed in self.update() on each notify message

        # list of clues given
        self.clues = list(
        )  # is refreshed in self.update() on each notify message

        # unfortunately, server references clue cards not by index but by an id between 0 and deck size,
        # so we need to store card_numbers to map the card ids to indices
        self.card_numbers = list()
        """
        # ################################################ #
        # ----------------- GAME STATS ------------------- #
        # ################################################ #
        """
        # is refreshed in self.update() on each notify message
        self.fireworks = {'R': 0, 'Y': 0, 'G': 0, 'W': 0, 'B': 0}

        # list of discarded cards as returned by self.card(suit, rank)
        self.discard_pile = list()

        # actually not contained in the returned dict of the
        # rl_env.HanabiEnvobservation._extract_from_dict()-method, but we need a history so we add this here.
        # Similarly, it could be added by appending obs_dict['last_moves'] = observation.last_moves() in said method.
        self.last_moves = list()
        self.variant = game_config['variant']
        self.num_colors = game_config['colors']
        self.num_ranks = game_config['ranks']
        self.max_moves = game_config['max_moves']
        self.order = 0  # number of the next card on the deck, incremented when card is drawn/dealt
        """
        # ################################################ #
        # -------------- USE PYHANABI MOCKS -------------- #
        # ################################################ #
        """

        self.env = json_to_pyhanabi.create_env_mock(
            num_players=self.num_players,
            num_colors=self.num_colors,
            num_ranks=self.num_ranks,
            hand_size=self.hand_size,
            max_information_tokens=self.max_info_tokens,
            max_life_tokens=self.max_life_tokens,
            max_moves=self.max_moves,
            variant=self.variant)

        self.caller_is_admin = False  # flag is used to determine one RL agent that keeps track of human players
        # environment observations in order to be able to keep the vectorized observations synchronized
        if self.agent_name[-2:] == '00':
            self.caller_is_admin = True  # admin is the first instance of the client class
        self.idx_human_player = -1  # used to determine whenever human is target of a card hint and thus when
        # hints will be out of sync with vectorizer environment state
        self.vectorizer_is_synced = False  # if human player got card hints, the other vectorizer instances must know
        self.vectorizer = vectorizer.ObservationVectorizer(self.env)
        self.legal_moves_vectorizer = vectorizer.LegalMovesVectorizer(self.env)