예제 #1
0
    def deal_hands(self) -> List[Iterable[Card]]:
        deck = new_deck()

        # Repeat random shuffles until the player's cards are good enough.
        while True:
            np.random.shuffle(deck)
            player_hands = [set(deck[i * 8:(i + 1) * 8]) for i in range(4)]
            if self._are_cards_suitable(
                    player_hands[self._game_mode.declaring_player_id],
                    self._game_mode):
                return player_hands
예제 #2
0
    def __init__(self, player_id: int):
        super().__init__(player_id)

        self.static_policy = [
            Card(Suit.eichel, Pip.ober),
            Card(Suit.eichel, Pip.unter),
            Card(Suit.gras, Pip.neun),
            Card(Suit.gras, Pip.ober),
            Card(Suit.herz, Pip.neun),
            Card(Suit.eichel, Pip.acht),
            Card(Suit.herz, Pip.acht),
            Card(Suit.herz, Pip.koenig),
            Card(Suit.gras, Pip.sau),
            Card(Suit.gras, Pip.unter),
            Card(Suit.schellen, Pip.ober),
            Card(Suit.herz, Pip.unter),
            Card(Suit.schellen, Pip.unter),
            Card(Suit.eichel, Pip.sau),
            Card(Suit.schellen, Pip.sau),
            Card(Suit.schellen, Pip.koenig),
            Card(Suit.herz, Pip.ober),
            Card(Suit.herz, Pip.sieben),
            Card(Suit.gras, Pip.zehn),
            Card(Suit.eichel, Pip.neun),
            Card(Suit.schellen, Pip.zehn),
            Card(Suit.gras, Pip.sieben),
            Card(Suit.herz, Pip.sau),
            Card(Suit.schellen, Pip.sieben),
            Card(Suit.eichel, Pip.sieben),
            Card(Suit.schellen, Pip.neun),
            Card(Suit.herz, Pip.zehn),
            Card(Suit.eichel, Pip.koenig),
            Card(Suit.schellen, Pip.acht),
            Card(Suit.gras, Pip.koenig),
            Card(Suit.eichel, Pip.zehn),
            Card(Suit.gras, Pip.acht),
        ]
        assert len(set(self.static_policy)) == len(
            new_deck()), "Need to include all cards!"
예제 #3
0
    def __init__(self, player_id: int, config: Dict, training: bool):
        """
        Creates a new DQNAgent.
        :param player_id: The unique id of the player (0-3).
        :param config: config dict containing an agent_config node.
        :param training: If True, will train during play. This usually means worse performance (because of exploration).
                         If False, then the agent will always pick the highest-ranking valid action.
        """
        super().__init__(player_id)
        self.logger = get_class_logger(self)

        config = config["agent_config"]["dqn_agent"]
        self.config = config
        self.training = training

        # We encode cards as one-hot vectors of size 32.
        # Providing indices to perform quick lookups.
        self._id2card = new_deck()
        self._card2id = {card: i for i, card in enumerate(self._id2card)}

        # Determine length of state vector.
        state_lens = {
            "cards_in_hand": 32,
            "cards_in_trick": 3 * 32,
            "cards_already_played": 32
        }
        self._state_size = sum(state_lens[x] for x in config["state_contents"])

        # Action space: One action for every card.
        # Naturally, most actions will be invalid because the agent doesn't have the card or is not allowed to play it.
        self._action_size = 32

        # If True, then all unavailable actions are zeroed in the q-vector during learning. I thought this might improve training
        # speed, but it turned out to provide only a slight benefit. Incompatible with (and superseded by) allow_invalid_actions.
        self._zero_q_for_invalid_actions = config["zero_q_for_invalid_actions"]

        # If allowed, then the agent can choose an invalid card and get punished for it, while staying
        # in the same state. If not allowed, invalid actions are automatically skipped when playing.
        # See discussion in experiment_log.md
        self._allow_invalid_actions = config["allow_invalid_actions"]
        self._invalid_action_reward = config["invalid_action_reward"]
        if self._allow_invalid_actions and self._zero_q_for_invalid_actions:
            raise ValueError(
                "allow_invalid_actions and zero_q_for_invalid_actions are mutually exclusive."
            )

        # Discount and exploration rate
        self._gamma = config["gamma"]
        self._epsilon = config["epsilon"]

        # Experience replay buffer for minibatch learning
        self.experience_buffer = deque(maxlen=config["experience_buffer_len"])

        # Remember the state and action (card) played in the previous trick, so we can can judge it once we receive feedback.
        # Also remember which actions were valid at that time.
        self._prev_state = None
        self._prev_action = None
        self._prev_available_actions = None
        self._in_terminal_state = False

        # Create Q network (current state) and Target network (successor state). The networks are synced after every episode (game).
        self.q_network = self._build_model()
        self.target_network = self._build_model()
        self._align_target_model()
        self._batch_size = config["batch_size"]

        # Don't retrain after every single experience.
        # Retraining every time is expensive and doesn't add much information (rewards are received only at the end of the game).
        # If we wait for more experiences to accumulate before retraining, we get more fresh data before doing expensive training.
        # NOTE: This kind of breaks the "sync networks after every game" idea, but nevertheless is working very well to speed up training.
        self._retrain_every_n = config["retrain_every"]
        self._experiences_since_last_retrain = 0

        # Memory: here are some things the agent remembers between moves. This is basically feature engineering,
        # it would be more interesting to have the agent learn these with an RNN or so!
        self._mem_cards_already_played = set()

        # For display in the GUI
        self._current_q_vals = None
예제 #4
0
    def deal_hands(self) -> List[Iterable[Card]]:
        deck = new_deck()
        np.random.shuffle(deck)

        player_hands = [set(deck[i * 8:(i + 1) * 8]) for i in range(4)]
        return player_hands
예제 #5
0
파일: gui.py 프로젝트: ttbrunner/alphasheep
    def __enter__(self):
        # Show PyGame window. Assets can only be loaded after this.
        self._screen = pygame.display.set_mode(self.resolution)
        self._card_assets = {card: pygame.image.load(get_card_img_path(card)).convert() for card in new_deck()}
        pygame.display.set_caption("Interactive AlphaSheep")

        # Subscribe to events of the controller
        self.game_state.ev_changed.subscribe(self.on_game_state_changed)

        # If a player agent is the GUIAgent, register a callback that blocks until the user selects a card.
        assert not any(isinstance(p.agent, GUIAgent) for p in self.game_state.players[1:]), "Only Player 0 can have a GUIAgent."
        if isinstance(self.game_state.players[0].agent, GUIAgent):

            def select_card_callback(reset_clicks=False):
                if reset_clicks:
                    self._clicked_pos = None
                self._clicked_card = None
                self.wait_and_draw_until(lambda: self._clicked_card is not None)
                return self._clicked_card

            self.game_state.players[0].agent.register_gui_callback(select_card_callback)