Beispiel #1
0
    def play_card(self, cards_in_hand: Iterable[Card],
                  cards_in_trick: List[Card], game_mode: GameMode):
        # Shuffles the agent's cards and picks the first one that is allowed.

        cards_in_hand = list(cards_in_hand)
        np.random.shuffle(cards_in_hand)

        return next(c for c in cards_in_hand if game_mode.is_play_allowed(
            c, cards_in_hand=cards_in_hand, cards_in_trick=cards_in_trick))
Beispiel #2
0
    def play_card(self, cards_in_hand: Iterable[Card],
                  cards_in_trick: List[Card], game_mode: GameMode):
        # Plays the first card from the static policy that is allowed.

        for card in self.static_policy:
            if card in cards_in_hand and game_mode.is_play_allowed(
                    card,
                    cards_in_hand=cards_in_hand,
                    cards_in_trick=cards_in_trick):
                return card

        raise ValueError(
            "None of the Player's cards seem to be allowed! This should never happen! Player has cards: {}"
            .format(",".join(str(c) for c in cards_in_hand)))
Beispiel #3
0
def sort_for_gui(cards: Iterable[Card], game_mode: GameMode) -> List[Card]:
    # For convenient display: sort all cards in descending order, starting with Trump.

    # If no game has been selected (yet), sort as if expecting a Herz-solo (same as any Rufspiel).
    if game_mode is None:
        game_mode = GameMode(GameContract.suit_solo,
                             trump_suit=Suit.herz,
                             declaring_player_id=None)

    def sort_key(card: Card):
        key = 0
        # Suit - start with Trump
        is_trump = game_mode.is_trump(card)
        if is_trump:
            key = 1000
        if is_trump and card.pip == Pip.ober:
            key += 500
        if is_trump and card.pip == Pip.unter:
            key += 400

        if card.suit == Suit.eichel:
            key += 30
        elif card.suit == Suit.gras:
            key += 20
        elif card.suit == Suit.herz:
            key += 10

        # Pip
        if card.pip == Pip.sau:
            key += 7
        elif card.pip == Pip.zehn:
            key += 6
        elif card.pip == Pip.koenig:
            key += 5
        elif card.pip == Pip.ober:  # Non-trump ober, for example during Wenz
            key += 4
        elif card.pip == Pip.unter:  # Non-trump unter
            key += 3
        elif card.pip == Pip.neun:
            key += 2
        elif card.pip == Pip.acht:
            key += 1
        elif card.pip == Pip.sieben:
            key += 0

        return key

    return sorted(cards, key=sort_key, reverse=True)
    def _are_cards_suitable(self, cards_in_hand, game_mode: GameMode):
        # Quick and dirty heuristic for deciding whether to play a solo.

        if game_mode.contract != GameContract.suit_solo:
            raise NotImplementedError(
                "Only Suit-solo is implemented at this time.")

        # Needs 6 trumps and either good Obers or lots of Unters.
        if sum(1 for c in cards_in_hand if game_mode.is_trump(c)) >= 6:
            if sum(1 for c in cards_in_hand if c.pip == Pip.ober) >= 2:
                return True
            elif sum(1 for c in cards_in_hand if c.pip == Pip.unter) >= 3:
                return True
            elif Card(Suit.eichel, Pip.ober) in cards_in_hand:
                return True
        return False
Beispiel #5
0
    def play_card(self, cards_in_hand: Iterable[Card], cards_in_trick: List[Card], game_mode: GameMode) -> Card:
        assert self._select_card_callback is not None, "Must first attach to a Gui!"

        # Have the user select cards until they hit something that is actually allowed :)
        reset_click = False
        while True:
            card = self._select_card_callback(reset_click)
            if game_mode.is_play_allowed(card, cards_in_hand=cards_in_hand, cards_in_trick=cards_in_trick):
                return card
            else:
                # Usually, the GUI caches the previous click.
                # So when a "choose" event follows a "click to continue", it's theoretically 2 clicks.
                # However, if the user clicked on a card on the FIRST click, the SECOND click will automatically choose that card.
                # That is intended behaviour, but in this case we need to reset the click cache.
                reset_click = True
                self.logger.warn(f"Cannot play selected card {card} - not allowed!")
Beispiel #6
0
def main():
    # Game Setup:
    # - In every game, Player 0 will play a Herz-Solo
    # - The cards are rigged so that Player 0 always receives a pretty good hand, most of them should be winnable.

    parser = argparse.ArgumentParser()
    parser.add_argument("--config", help="An experiment config file. Must always be specified.", required=True)
    args = parser.parse_args()

    # Init logging and adjust log levels for some classes.
    init_logging()
    logger = get_named_logger("{}.main".format(os.path.splitext(os.path.basename(__file__))[0]))
    get_class_logger(GameController).setLevel(logging.INFO)     # Don't log specifics of a single game

    # Load config.
    # Create experiment dir and prepend it to all paths.
    # If it already exists, then training will simply resume from existing checkpoints in that dir.
    logger.info(f'Loading config from "{args.config}"...')
    config = load_config(args.config)
    experiment_dir = config["experiment_dir"]
    os.makedirs(config["experiment_dir"], exist_ok=True)
    agent_checkpoint_paths = {i: os.path.join(experiment_dir, name) for i, name in config["training"]["agent_checkpoint_names"].items()}

    # Create agents.
    agents = []
    for i in range(4):
        x = config["training"]["player_agents"][i]
        if x == "DQNAgent":
            agent = DQNAgent(i, config=config, training=True)
        elif x == "RandomCardAgent":
            agent = RandomCardAgent(i)
        elif x == "RuleBasedAgent":
            agent = RuleBasedAgent(i)
        else:
            raise ValueError(f'Unknown agent type: "{x}"')
        agents.append(agent)

    # Load weights for agents.
    for i, weights_path in agent_checkpoint_paths.items():
        if not os.path.exists(weights_path):
            logger.info('Weights file "{}" does not exist. Will create new file.'.format(weights_path))
        else:
            agents[i].load_weights(weights_path)

    players = [Player(f"Player {i} ({a.__class__.__name__})", agent=a) for i, a in enumerate(agents)]

    # Rig the game so Player 0 has the cards to play a Herz-Solo. Force them to play it.
    game_mode = GameMode(GameContract.suit_solo, trump_suit=Suit.herz, declaring_player_id=0)
    controller = GameController(players, dealing_behavior=DealWinnableHand(game_mode), forced_game_mode=game_mode)

    n_episodes = config["training"]["n_episodes"]
    logger.info(f"Will train for {n_episodes} episodes.")

    # Calculate win% as simple moving average (just for display in the logfile).
    # The real evaluation is done in eval_rl_agent.py, with training=False.
    win_rate = float('nan')
    n_won = 0
    sma_window_len = 1000
    won_deque = deque()

    save_every_s = config["training"]["save_checkpoints_every_s"]

    time_start = timer()
    time_last_save = timer()
    for i_episode in range(n_episodes):
        if i_episode > 0:
            # Calculate avg win%
            if i_episode < sma_window_len:
                win_rate = n_won / i_episode
            else:
                if won_deque.popleft() is True:
                    n_won -= 1
                win_rate = n_won / sma_window_len

            # Log
            if i_episode % 100 == 0:
                s_elapsed = timer() - time_start
                logger.info("Ran {} Episodes. Win rate (last {} episodes) is {:.1%}. Speed is {:.0f} episodes/second.".format(
                    i_episode, sma_window_len, win_rate, i_episode/s_elapsed))

            # Save model checkpoint.
            # Also make a copy for evaluation - the eval jobs will sync on this file and later remove it.
            if timer() - time_last_save > save_every_s:
                for i, weights_path in agent_checkpoint_paths.items():
                    agents[i].save_weights(weights_path, overwrite=True)
                    shutil.copyfile(weights_path, f"{os.path.splitext(weights_path)[0]}.for_eval.h5")
                time_last_save = timer()

        winners = controller.run_game()
        won = winners[0]
        won_deque.append(won)
        if won:
            n_won += 1

    logger.info("Finished playing.")
    logger.info("Final win rate: {:.1%}".format(win_rate))
Beispiel #7
0
def eval_agent(agent: PlayerAgent) -> float:
    """
    Evaluates an agent by playing a large number of games against 3 RuleBasedAgents.

    :param agent: The agent to evaluate.
    :return: The mean win rate of the agent.
    """

    logger = get_named_logger("{}.eval_agent".format(os.path.splitext(os.path.basename(__file__))[0]))
    # logger.setLevel(logging.DEBUG)

    # Main set of players
    players = [
        Player("0-agent", agent=agent),
        Player("1-Zenzi", agent=RuleBasedAgent(1)),
        Player("2-Franz", agent=RuleBasedAgent(2)),
        Player("3-Andal", agent=RuleBasedAgent(3))
    ]

    # Rig the game so Player 0 has the cards to play a Herz-Solo.
    game_mode = GameMode(GameContract.suit_solo, trump_suit=Suit.herz, declaring_player_id=0)
    rng_dealer = DealWinnableHand(game_mode)

    # Run 20k different games. Each game can be replicated (via DealExactly) and sampled multiple times.
    # Right now, our baseline (RuleBasedAgent) is almost deterministic, so it's ok to sample each game only once.
    n_games = 20000
    n_agent_samples = 1
    perf_record = np.empty(n_games, dtype=np.float32)

    time_start = timer()
    for i_game in range(n_games):
        if i_game > 0 and i_game % 100 == 0:
            s_elapsed = timer() - time_start
            mean_perf = np.mean(perf_record[:i_game])
            logger.info("Ran {} games. Mean agent winrate={:.3f}. "
                        "Speed is {:.1f} games/second.".format(i_game, mean_perf, i_game/s_elapsed))

        # Deal a single random hand and then create a dealer that will replicate this hand,
        # so we can take multiple samples of this game.
        player_hands = rng_dealer.deal_hands()
        replicating_dealer = DealExactly(player_hands)
        i_player_dealer = i_game % 4

        def sample_games(sample_players, n_samples):
            n_samples_won = 0
            for i_sample in range(n_samples):
                controller = GameController(sample_players, i_player_dealer=i_player_dealer,
                                            dealing_behavior=replicating_dealer, forced_game_mode=game_mode)
                winners = controller.run_game()
                if winners[0] is True:
                    n_samples_won += 1
            return n_samples_won / n_samples

        agent_win_rate = sample_games(players, n_agent_samples)
        logger.debug("Agent win rate: {:.1%}.".format(agent_win_rate))

        perf_record[i_game] = agent_win_rate

    s_elapsed = timer() - time_start
    mean_perf = np.mean(perf_record).item()
    logger.info("Finished evaluation. Took {:.0f} seconds.".format(s_elapsed))
    logger.info("Mean agent winrate={:.3f}.".format(mean_perf))

    return mean_perf
    def run_game(self) -> List[bool]:
        """
        Runs a single game (and shifts the dealing player clockwise). Can be called multiple times.
        :returns a list of 4 bools, indicating which player(s) won the game.
        """
        def log_phase():
            self.logger.debug("===== Entering Phase: {} =====".format(
                self.game_state.game_phase))

        assert self.game_state.game_phase == GamePhase.pre_deal

        for p in self.game_state.players:
            p.agent.notify_new_game()

        # DEALING PHASE
        self.game_state.game_phase = GamePhase.dealing
        log_phase()
        self.logger.debug("Player {} is dealing.".format(
            self.game_state.players[self.game_state.i_player_dealer]))
        hands = self.dealing_behavior.deal_hands()
        for i, p in enumerate(self.game_state.players):
            p.cards_in_hand = hands[i]
        self.game_state.ev_changed.notify()

        # BIDDING PHASE
        # Choose the game mode and declaring player.
        self.game_state.game_phase = GamePhase.bidding
        log_phase()
        if self.forced_game_mode is not None:
            # We have been instructed to only play this game.
            game_mode = self.forced_game_mode
        else:
            # Free choice - for now, randomly select somebody to play a Herz Solo.
            # TODO: allow agents to bid & declare on their own
            game_mode = GameMode(GameContract.suit_solo,
                                 trump_suit=Suit.herz,
                                 declaring_player_id=np.random.randint(4))
        i_decl = game_mode.declaring_player_id
        self.logger.debug("Game Variant: Player {} is declaring a {}!".format(
            self.game_state.players[i_decl], game_mode))
        self.game_state.game_mode = game_mode
        self.game_state.ev_changed.notify()

        # PLAYING PHASE
        self.game_state.game_phase = GamePhase.playing
        log_phase()
        self._playing_phase()

        # POST-GAME PHASE
        # Count score and determine winner.
        # TODO: For now, always scoring a solo.
        self.game_state.game_phase = GamePhase.post_play
        log_phase()

        player_scores = [
            sum(pip_scores[c.pip] for c in p.cards_in_scored_tricks)
            for p in self.game_state.players
        ]
        for i, p in enumerate(self.game_state.players):
            self.logger.debug("Player {} has score {}.".format(
                p, player_scores[i]))
        if player_scores[i_decl] > 60:
            player_win = [i == i_decl for i in range(4)]
        else:
            player_win = [i != i_decl for i in range(4)]
        self.logger.debug("=> Player {} {} the {}!".format(
            self.game_state.players[i_decl],
            "wins" if player_win[i_decl] else "loses", game_mode))

        self.logger.debug("Summary:")
        for i, p in enumerate(self.game_state.players):
            self.logger.debug("Player {} {}.".format(
                p, "wins" if player_win[i] else "loses"))
            p.agent.notify_game_result(player_win[i],
                                       own_score=player_scores[i])
        self.game_state.ev_changed.notify()

        # Reset to PRE-DEAL PHASE.
        self.game_state.game_phase = GamePhase.pre_deal
        log_phase()
        self.game_state.clear_after_game()
        self.game_state.i_player_dealer = (self.game_state.i_player_dealer +
                                           1) % 4
        self.game_state.ev_changed.notify()

        return player_win
    def _play_card_solo_declaring(self, cards_in_hand: Iterable[Card], cards_in_trick: List[Card], game_mode: GameMode) -> Card:
        # When a solo is being played and we are the declaring player.

        # We have a set of high-level actions, such as "play highest trump", "play low color" etc.
        # Right now, this is only for logging purposes, but ultimately we would want some sort of hierarchical planning:
        # First, choose an action, and later find a card that fits.
        # These action definitions could also be shared across behaviors, so this could remove some of the redundancy
        #  we get when duplicating behavior for different game modes.

        valid_cards = [c for c in cards_in_hand if game_mode.is_play_allowed(c, cards_in_hand, cards_in_trick)]
        own_trumps = self._trumps_by_power(in_cards=valid_cards, game_mode=game_mode)

        if len(cards_in_trick) == 0:
            # We are leading.
            if any(own_trumps):
                # As a general rule, we'd like to play our trumps high to low.
                action = "play_highest_trump"
                selected_card = own_trumps[-1]
            else:
                # No trump, play color.
                saus = [c for c in cards_in_hand if c.pip == Pip.sau]
                if any(saus):
                    # Play a color sau.
                    action = "play_color_sau"
                    selected_card = saus[np.random.randint(len(saus))]
                else:
                    # Play a Spatz (low value).
                    # Depending on what happend in the game, it might be very important which color is played.
                    # But this goes beyond this simple baseline :)
                    action = "play_spatz"
                    selected_card = self._cards_by_value(valid_cards)[0]

        else:
            # Not leading.
            # Do we need to match? Get all valid options.
            c_lead = cards_in_trick[0]

            if any(c for c in valid_cards if game_mode.is_trump(c)):
                # We can play trump (in fact, any trump we have).  Which one will we pick?

                # Find out if we can beat the preceding cards.
                if any(game_mode.is_trump(c) for c in cards_in_trick):
                    beating_cards = [c for c in own_trumps
                                     if self._trump_power(c) > max(self._trump_power(c2) for c2 in cards_in_trick
                                                                   if game_mode.is_trump(c2))]
                else:
                    beating_cards = own_trumps

                if any(beating_cards):
                    # We can beat the preceding cards.
                    # a) pick lowest trump that will beat the prev players. Do this if 0-1 players come after us.
                    # b) pick a high trump to prevent following players to beat. Do this if 2 players come after us.
                    beat_low = len(cards_in_trick) > 1
                    beating_cards_by_power = self._trumps_by_power(beating_cards, game_mode)
                    if beat_low:
                        action = "beat_trump_low"
                        selected_card = beating_cards_by_power[0]
                    else:
                        action = "beat_trump_high"
                        selected_card = beating_cards_by_power[-1]
                else:
                    # We actually cannot beat them. Play a spatz (low-value card).
                    action = "play_spatz"
                    selected_card = self._cards_by_value(valid_cards)[0]

            elif not game_mode.is_trump(c_lead):
                # We can't play trump, but the leading card also is not a trump.
                # Therefore we might beat it with a higher card of the same suit.
                beating_cards = [c for c in valid_cards
                                 if c.suit == c_lead.suit
                                 and self._pip_power[c.pip] > max(self._pip_power[c2.pip] for c2 in cards_in_trick
                                                                  if c2.suit == c_lead.suit)]
                if any(beating_cards):
                    # In the case of suit, always beat high (hopefully with a sau).
                    action = "beat_suit_high"
                    selected_card = sorted(beating_cards, key=lambda c: self._pip_power[c.pip], reverse=True)[0]
                else:
                    action = "play_spatz"
                    selected_card = self._cards_by_value(valid_cards)[0]

            else:
                # We cannot beat it with or without trump. Play a spatz (low-value card).
                action = "play_spatz"
                selected_card = self._cards_by_value(valid_cards)[0]

        self.logger.debug(f'Executing action "{action}".')
        assert game_mode.is_play_allowed(selected_card, cards_in_hand=cards_in_hand, cards_in_trick=cards_in_trick)
        return selected_card
 def _trumps_by_power(self, in_cards: Iterable[Card], game_mode: GameMode) -> List[Card]:
     # Filters in_cards by trumps and returns them, sorted py power.
     return sorted([c for c in in_cards if game_mode.is_trump(c)], key=self._trump_power)
    def _play_card_solo_not_declaring(self, cards_in_hand: Iterable[Card], cards_in_trick: List[Card], game_mode: GameMode) -> Card:
        # When a solo is being played and the declaring player is the enemy.
        valid_cards = [c for c in cards_in_hand if game_mode.is_play_allowed(c, cards_in_hand, cards_in_trick)]
        own_trumps = self._trumps_by_power(in_cards=valid_cards, game_mode=game_mode)
        non_trumps = set(valid_cards).difference(own_trumps)

        if len(cards_in_trick) == 0:
            # We are leading.
            # As a general rule, we want to play any non-trump Sau we have (IF this suit has not been played before).
            # We hope that the enemy has a card of this suit and is forced to match.
            # TODO: create memory, and check if a) the suit has been played before, b) the enemy is known to have it
            # TODO: in this case, do NOT play it. There are exceptions, but well.
            saus = [c for c in non_trumps if c.pip == Pip.sau]
            if any(saus):
                action = "play_color_sau"
                selected_card = saus[np.random.randint(len(saus))]
            else:
                # No sau: don't play 10 etc., rather play a small card and hope our partners have the sau
                action = "play_spatz"
                selected_card = self._cards_by_value(non_trumps if any(non_trumps) else own_trumps)[0]

        else:
            # Not leading.
            # In this situation, we want to:
            # - minimize damage as the enemy will probably take the trick
            # - maximize score whenever it looks like we (or our partners) might take it.

            # Has the enemy already played their card?
            # TODO: This is HORRIBLE and I'm tired. Make a nice helper function and abstract this modulo crap away for all eternity, PLEASE!
            enemy_id = game_mode.declaring_player_id
            enemy_card_id = None
            i_p = self.player_id
            for i in range(len(cards_in_trick)):
                i_p = (i_p - 1) % 4
                if i_p == enemy_id:
                    enemy_card_id = len(cards_in_trick) - 1 - i
                    break
            enemy_card = cards_in_trick[enemy_card_id] if enemy_card_id is not None else None

            if enemy_card_id is not None:
                # The enemy has already made their move.
                if self._winning_card(cards_in_trick, game_mode) != enemy_card:
                    # A partner has already beaten the enemy.
                    # Give them as many points as possible.
                    non_trump_by_value = self._cards_by_value(non_trumps)
                    if any(non_trump_by_value):
                        # Put the most expensive non-trump
                        action = "schmier_points"
                        selected_card = non_trump_by_value[-1]
                    else:
                        # We are not allowed to schmier a non-trump. Put the most expensive trump.
                        # TODO: don't schmier an ober!
                        #       DQNAgent learns to exploit this!
                        action = "schmier_trump"
                        selected_card = self._cards_by_value(own_trumps)[-1]

                else:
                    # The enemy has already played, but it's not clear who will take the trick.
                    # Can we beat it?
                    beating_cards = [c for c in valid_cards if c == self._winning_card(cards_in_trick + [c], game_mode)]
                    if any(beating_cards):
                        # We can actually beat the enemy. Use the most expensive option.
                        # TODO: don't schmier-stech with ober if not necessary!
                        #       DQNAgent learns to exploit this!
                        action = "beat_expensive"
                        selected_card = self._cards_by_value(beating_cards)[-1]
                    else:
                        # Can't beat them. Can we expect a partner to beat them?
                        # TODO: create memory of cards that are still in the game. Recognize if the enemy played the currently highest trump.
                        # Right now, we don't think a partner could ever beat the enemy. With that memory, if the enemy played a low trump, we could
                        # wager on our partners in some cases.
                        # Play a Spatz.
                        action = "play_spatz"
                        selected_card = self._cards_by_value(valid_cards)[0]
            else:
                # The enemy has not yet played.
                if game_mode.is_trump(cards_in_trick[0]):
                    # WTF, our partner played trump. Idiot! We expect the enemy will surely take it.
                    action = "play_spatz;insult_leader"
                    selected_card = self._cards_by_value(valid_cards)[0]
                else:
                    # It's a suit card. As a general rule, we hope that the enemy has to match and we might take it.
                    # This might not be the case depending on what suits were already played, but we are not that smart right now :)
                    beating_cards = [c for c in valid_cards if c == self._winning_card(cards_in_trick + [c], game_mode)]
                    if any(beating_cards):
                        # We can beat our partners.
                        if game_mode.is_trump(beating_cards[0]):
                            # We don't have that suit and can beat it with a trump.
                            # There is a lot of options here, but we will stick to the golden rule: "Mi'm Unter gehst net unter".
                            beating_unter = [c for c in valid_cards if c.pip == Pip.unter]
                            if any(beating_unter):
                                # Play the lowest unter, which makes the trick "safe" (the enemy can't beat with an expensive sau).
                                action = "beat_with_unter"
                                selected_card = self._trumps_by_power(beating_unter, game_mode)[0]
                            else:
                                # It's also fine to be risky and beat with sau/zehn. We expect the enemy to take those away soon anyway.
                                action = "beat_expensive"
                                selected_card = self._cards_by_value(beating_cards)[-1]
                        else:
                            # We must match the suit and can beat our partner.
                            # Do it, since this probably means playing the sau, which is good.
                            action = "match_expensive"
                            selected_card = self._cards_by_value(valid_cards)[-1]
                    else:
                        # The partner lead is non-trump and we can't beat the partners.
                        # TODO: special situation: did the 2nd partner beat with a trump,
                        #  so high that the enemy won't be able to take it? then schmier.
                        if any(c for c in cards_in_trick if c.suit == cards_in_trick[0] and c.pip == Pip.sau):
                            # One of the partners played the suit-sau. We hope the enemy needs to match!
                            # TODO: don't schmier if it's clear from memory that the enemy can beat it.
                            action = "schmier_points"
                            selected_card = self._cards_by_value(valid_cards)[-1]
                        else:
                            # It's non-trump, low, we need to match, looks bad man.
                            action = "play_spatz"
                            selected_card = self._cards_by_value(valid_cards)[0]

        self.logger.debug(f'Executing action "{action}".')
        assert game_mode.is_play_allowed(selected_card, cards_in_hand=cards_in_hand, cards_in_trick=cards_in_trick)
        return selected_card
Beispiel #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--p0-agent",
        type=str,
        choices=['static', 'rule', 'random', 'alphasheep', 'user'],
        required=True)
    parser.add_argument(
        "--alphasheep-checkpoint",
        help="Checkpoint for AlphaSheep, if --p0-agent=alphasheep.",
        required=False)
    parser.add_argument(
        "--agent-config",
        help="YAML file, containing agent specifications for AlphaSheep.",
        required=False)
    args = parser.parse_args()
    agent_choice = args.p0_agent
    as_checkpoint_path = args.alphasheep_checkpoint
    as_config_path = args.agent_config
    if agent_choice == "alphasheep" and (not as_checkpoint_path
                                         or not as_config_path):
        raise ValueError(
            "Need to specify --alphasheep-checkpoint and --agent-config if --p0_agent=alphasheep."
        )

    # Init logging and adjust log levels for some classes.
    init_logging()
    logger = get_named_logger("{}.main".format(
        os.path.splitext(os.path.basename(__file__))[0]))
    get_class_logger(GameController).setLevel(
        logging.DEBUG)  # Log every single card.
    get_class_logger(Gui).setLevel(logging.DEBUG)  # Log mouse clicks.
    get_class_logger(RuleBasedAgent).setLevel(
        logging.DEBUG)  # Log decisions by the rule-based players.

    # Create the agent for Player 0.
    if agent_choice == "alphasheep":

        # Load config. We ignore the "training" and "experiment" sections, but we need "agent_config".
        logger.info(f'Loading config from "{as_config_path}"...')
        config = load_config(as_config_path)
        get_class_logger(DQNAgent).setLevel(logging.DEBUG)  # Log Q-values.
        alphasheep_agent = DQNAgent(0, config=config, training=False)
        alphasheep_agent.load_weights(as_checkpoint_path)
        p0 = Player("0-AlphaSheep", agent=alphasheep_agent)
    elif agent_choice == "user":
        p0 = Player("0-User", agent=GUIAgent(0))
    elif agent_choice == "rule":
        p0 = Player("0-Hans", agent=RuleBasedAgent(0))
    elif agent_choice == "static":
        p0 = Player("0-Static", agent=StaticPolicyAgent(0))
    else:
        p0 = Player("0-RandomGuy", agent=RandomCardAgent(0))

    # Players 1-3 are RuleBasedAgents.
    players = [
        p0,
        Player("1-Zenzi", agent=RuleBasedAgent(1)),
        Player("2-Franz", agent=RuleBasedAgent(2)),
        Player("3-Andal", agent=RuleBasedAgent(3))
    ]

    # Rig the game so Player 0 has the cards to play a Herz-Solo.
    # Also, force them to play it.
    game_mode = GameMode(GameContract.suit_solo,
                         trump_suit=Suit.herz,
                         declaring_player_id=0)
    controller = GameController(players,
                                dealing_behavior=DealWinnableHand(game_mode),
                                forced_game_mode=game_mode)

    # The GUI initializes PyGame and registers on events provided by the controller. Everything single-threaded.
    #
    # The controller runs the game as usual. Whenever the GUI receives an event, it can block execution, so the controller must wait
    # for the GUI to return control. Until then, it can draw stuff and wait for user input (mouse clicks, card choices, ...).
    logger.info("Starting GUI.")
    with Gui(controller.game_state) as gui:
        # Run an endless loop of single games.
        logger.info("Starting game loop...")
        try:
            while True:
                controller.run_game()
        except UserQuitGameException:  # Closing the window or pressing [Esc]
            logger.info("User quit game.")

    logger.info("Shutdown.")
Beispiel #13
0
    def play_card(self, cards_in_hand: Iterable[Card],
                  cards_in_trick: List[Card], game_mode: GameMode):
        if self._in_terminal_state:
            raise ValueError(
                "Agent is in terminal state. Did you start a new game? Need to call notify_new_game() first."
            )

        # Encode the current state.
        state = self._encode_state(cards_in_hand=cards_in_hand,
                                   cards_in_trick=cards_in_trick)

        # Did a previous action lead to this state? Save experience for training.
        if self.training and self._prev_action is not None:
            # Reward=0: We reward only the terminal state.
            self._receive_experience(
                state=self._prev_state,
                action=self._prev_action,
                reward=0,
                next_state=state,
                terminated=False,
                available_actions=self._prev_available_actions)

        # Create a mask of available actions.
        available_actions = np.zeros(self._action_size, dtype=np.bool)
        for card in cards_in_hand:
            if game_mode.is_play_allowed(card,
                                         cards_in_hand=cards_in_hand,
                                         cards_in_trick=cards_in_trick):
                available_actions[self._card2id[card]] = True

        # Pick an action (a card).
        selected_card = None
        while selected_card is None:
            # We run this in a loop, because the agent can select an invalid action and is then asked to learn and try again.

            if self.training and np.random.rand() <= self._epsilon:
                # Explore: Select a random card. For faster training, exploration only targets valid actions.
                self._current_q_vals = np.ones(
                    self._action_size, dtype=np.float32) / self._action_size
                tmp_cards = list(cards_in_hand)
                np.random.shuffle(tmp_cards)
                selected_card = next(c for c in tmp_cards
                                     if available_actions[self._card2id[c]])
            else:
                # Exploit: Predict q-values for the current state and select the best action.
                q_values = np.array(
                    self.q_network.predict_on_batch(state[np.newaxis, :]))[0]
                self._current_q_vals = q_values
                best_action_ids = np.argsort(q_values)[::-1]
                self.logger.debug("Q values:\n" + "\n".join(
                    f"{q_values[a]}: {self._id2card[a]}"
                    for a in best_action_ids))

                if self._allow_invalid_actions and self.training:
                    # If invalid is allowed (only during training): select the "best" action.
                    selected_card = self._id2card[best_action_ids[0]]
                    if not available_actions[best_action_ids[0]]:
                        # Did we pick an invalid move? Time for punishment!
                        # Experience: we stay in the same state, but get a negative reward.
                        self._receive_experience(
                            state=state,
                            action=self._encode_action(selected_card),
                            reward=self._invalid_action_reward,
                            next_state=state,
                            terminated=False,
                            available_actions=available_actions)
                        selected_card = None
                else:
                    # Invalid is not allowed: pick the "best" action that is allowed.
                    selected_card = next(self._id2card[a]
                                         for a in best_action_ids
                                         if available_actions[a])

        # Store the state and chosen action until the next call (in which we will receive feedback)
        self._prev_state = state
        self._prev_action = self._encode_action(selected_card)
        self._prev_available_actions = available_actions

        # Memory: remember cards that were played.
        self._mem_cards_already_played.update(cards_in_trick)
        self._mem_cards_already_played.add(selected_card)

        return selected_card