Esempio n. 1
0
    def __init__(self, game_state: GameState):
        self.game_state = game_state
        self.logger = get_class_logger(self)

        pygame.init()
        self.resolution = (1280, 800)
        self._screen = None
        self._card_assets = None
        self._fps_clock = pygame.time.Clock()

        # Every player has a "Card surface" onto which their cards are drawn.
        # This card surface is then rotated and translated into position.
        p_card_surf_dims = (310, 170)
        p_text_surf_dims = (200, 140)
        self._player_card_surfs = [pygame.Surface(p_card_surf_dims) for _ in range(4)]
        self._player_text_surfs = [pygame.Surface(p_text_surf_dims) for _ in range(4)]

        # Surface in the middle, containing the "cards on the table".
        self._middle_trick_surf = pygame.Surface((300, 270))

        pygame.freetype.init()
        self._font = pygame.freetype.SysFont(None, 11)
        self._font.antialiased = True

        # Latest click - these values survive only for one draw call. During the draw call they are set, and afterwards read.
        self._clicked_pos = None
        self._clicked_card = None        # For now, these are only Player 0's cards.
Esempio n. 2
0
    def __init__(self, player_id: int):
        super().__init__(player_id)

        self.logger = get_class_logger(self)

        # "Power" values for quickly determining which card can beat which.
        # Defining this here because we don't want to be dependent on the enum int values.
        self._suit_power = {Suit.eichel: 40, Suit.gras: 30, Suit.herz: 20, Suit.schellen: 10}
        self._pip_power = {Pip.sau: 8, Pip.zehn: 7, Pip.koenig: 6, Pip.ober: 5, Pip.unter: 4, Pip.neun: 3, Pip.acht: 2, Pip.sieben: 1}
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--p0-agent", type=str, choices=['static', 'rule', 'random'], required=True)
    args = parser.parse_args()
    agent_choice = args.p0_agent

    # Init logging and adjust log levels for some classes.
    init_logging()
    logger = get_named_logger("{}.main".format(os.path.splitext(os.path.basename(__file__))[0]))
    get_class_logger(GameController).setLevel(logging.INFO)     # Don't log specifics of a single game

    # Create the agent for Player 0.
    if agent_choice == "rule":
        agent = RuleBasedAgent(0)
    elif agent_choice == "static":
        agent = StaticPolicyAgent(0)
    else:
        agent = RandomCardAgent(0)

    logger.info(f'Evaluating agent "{agent.__class__.__name__}"')
    perf = eval_agent(agent)
Esempio n. 4
0
    def __init__(self,
                 players: List[Player],
                 i_player_dealer=0,
                 dealing_behavior: DealingBehavior = DealFairly(),
                 forced_game_mode: GameMode = None):
        """
        Creates a GameController and, together with it, a GameState. Should be reused - run run_game() in order to simulate a single game.
        :param players: the players, along with their agents.
        :param i_player_dealer: The player who is the dealer at start (i+1 is the player who will lead in the first game).
        :param dealing_behavior: Optional - the dealing behaviour. Default = fair
        :param forced_game_mode: Optional - if not None, players cannot bid, but every game is always the provided mode.
        """
        assert len(players) == 4

        self.logger = get_class_logger(self)
        self.logger.debug("Initializing game.")
        self.logger.debug("Players:")
        for p in players:
            self.logger.debug("Player {} with behavior {}.".format(p, p.agent))

        self.game_state = GameState(players, i_player_dealer=i_player_dealer)
        self.dealing_behavior = dealing_behavior
        self.forced_game_mode = forced_game_mode
        assert forced_game_mode is None or forced_game_mode.declaring_player_id is not None, "Must provide a specific player."
Esempio n. 5
0
def main():
    # Game Setup:
    # - In every game, Player 0 will play a Herz-Solo
    # - The cards are rigged so that Player 0 always receives a pretty good hand, most of them should be winnable.

    parser = argparse.ArgumentParser()
    parser.add_argument("--config", help="An experiment config file. Must always be specified.", required=True)
    args = parser.parse_args()

    # Init logging and adjust log levels for some classes.
    init_logging()
    logger = get_named_logger("{}.main".format(os.path.splitext(os.path.basename(__file__))[0]))
    get_class_logger(GameController).setLevel(logging.INFO)     # Don't log specifics of a single game

    # Load config.
    # Create experiment dir and prepend it to all paths.
    # If it already exists, then training will simply resume from existing checkpoints in that dir.
    logger.info(f'Loading config from "{args.config}"...')
    config = load_config(args.config)
    experiment_dir = config["experiment_dir"]
    os.makedirs(config["experiment_dir"], exist_ok=True)
    agent_checkpoint_paths = {i: os.path.join(experiment_dir, name) for i, name in config["training"]["agent_checkpoint_names"].items()}

    # Create agents.
    agents = []
    for i in range(4):
        x = config["training"]["player_agents"][i]
        if x == "DQNAgent":
            agent = DQNAgent(i, config=config, training=True)
        elif x == "RandomCardAgent":
            agent = RandomCardAgent(i)
        elif x == "RuleBasedAgent":
            agent = RuleBasedAgent(i)
        else:
            raise ValueError(f'Unknown agent type: "{x}"')
        agents.append(agent)

    # Load weights for agents.
    for i, weights_path in agent_checkpoint_paths.items():
        if not os.path.exists(weights_path):
            logger.info('Weights file "{}" does not exist. Will create new file.'.format(weights_path))
        else:
            agents[i].load_weights(weights_path)

    players = [Player(f"Player {i} ({a.__class__.__name__})", agent=a) for i, a in enumerate(agents)]

    # Rig the game so Player 0 has the cards to play a Herz-Solo. Force them to play it.
    game_mode = GameMode(GameContract.suit_solo, trump_suit=Suit.herz, declaring_player_id=0)
    controller = GameController(players, dealing_behavior=DealWinnableHand(game_mode), forced_game_mode=game_mode)

    n_episodes = config["training"]["n_episodes"]
    logger.info(f"Will train for {n_episodes} episodes.")

    # Calculate win% as simple moving average (just for display in the logfile).
    # The real evaluation is done in eval_rl_agent.py, with training=False.
    win_rate = float('nan')
    n_won = 0
    sma_window_len = 1000
    won_deque = deque()

    save_every_s = config["training"]["save_checkpoints_every_s"]

    time_start = timer()
    time_last_save = timer()
    for i_episode in range(n_episodes):
        if i_episode > 0:
            # Calculate avg win%
            if i_episode < sma_window_len:
                win_rate = n_won / i_episode
            else:
                if won_deque.popleft() is True:
                    n_won -= 1
                win_rate = n_won / sma_window_len

            # Log
            if i_episode % 100 == 0:
                s_elapsed = timer() - time_start
                logger.info("Ran {} Episodes. Win rate (last {} episodes) is {:.1%}. Speed is {:.0f} episodes/second.".format(
                    i_episode, sma_window_len, win_rate, i_episode/s_elapsed))

            # Save model checkpoint.
            # Also make a copy for evaluation - the eval jobs will sync on this file and later remove it.
            if timer() - time_last_save > save_every_s:
                for i, weights_path in agent_checkpoint_paths.items():
                    agents[i].save_weights(weights_path, overwrite=True)
                    shutil.copyfile(weights_path, f"{os.path.splitext(weights_path)[0]}.for_eval.h5")
                time_last_save = timer()

        winners = controller.run_game()
        won = winners[0]
        won_deque.append(won)
        if won:
            n_won += 1

    logger.info("Finished playing.")
    logger.info("Final win rate: {:.1%}".format(win_rate))
Esempio n. 6
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("--config",
                        help="A yaml config file. Must always be specified.",
                        required=True)
    parser.add_argument("--loop",
                        help="If set, then runs in an endless loop.",
                        required=False,
                        action="store_true")
    args = parser.parse_args()
    do_loop = args.loop is True

    # Init logging and adjust log levels for some classes.
    init_logging()
    logger = get_named_logger("{}.main".format(
        os.path.splitext(os.path.basename(__file__))[0]))
    get_class_logger(GameController).setLevel(
        logging.INFO)  # Don't log specifics of a single game

    # Load config and check experiment dir.
    logger.info(f'Loading config from "{args.config}"...')
    config = load_config(args.config)
    experiment_dir = config["experiment_dir"]
    while not os.path.exists(experiment_dir):
        logger.warn(
            f'The experiment dir specified in the config does yet not exist: "{experiment_dir}" - waiting...'
        )
        sleep(10)

    agent_checkpoint_paths = {
        i: os.path.join(experiment_dir, name)
        for i, name in config["training"]["agent_checkpoint_names"].items()
    }

    while True:
        # Wait until a ".for_eval" checkpoint exists (for any of possibly multiple agents). Then rename it to ".in_eval.[uniqueid]".
        # In this way, multiple eval scripts can run in parallel.
        # When the evaluation is done, we will rename it to ".{score}".

        for i_agent, cp_path in agent_checkpoint_paths.items():
            # If multiple agents are specified in the config, evaluate all of them.

            checkpoint_path_in = f"{os.path.splitext(cp_path)[0]}.for_eval.h5"
            checkpoint_path_tmp = f"{os.path.splitext(cp_path)[0]}.in_eval.pid{os.getpid()}.h5"
            if os.path.exists(checkpoint_path_in):
                # Load the latest checkpoint and evaluate it

                try:
                    os.rename(checkpoint_path_in, checkpoint_path_tmp)
                    logger.info('Found a new checkpoint, evaluating...')

                    # Create agent
                    agent_type = config["training"]["player_agents"][i_agent]
                    if agent_type == "DQNAgent":
                        alphasheep_agent = DQNAgent(0,
                                                    config=config,
                                                    training=False)
                    else:
                        raise ValueError(
                            f"Unknown agent type specified: {agent_type}")
                    alphasheep_agent.load_weights(checkpoint_path_tmp)

                    # Eval agent
                    current_perf = eval_agent(alphasheep_agent)

                    # Now we know the performance. Find best-performing previous checkpoint that exists on disk
                    logger.info(
                        "Comparing performance to previous checkpoints...")
                    splitext = os.path.splitext(cp_path)
                    checkpoints = glob.glob("{}-*{}".format(
                        splitext[0], splitext[1]))
                    best_perf = 0.
                    for cp in checkpoints:
                        perf_str = re.findall(
                            r"{}-(.*){}".format(os.path.basename(splitext[0]),
                                                splitext[1]), cp)
                        if len(perf_str) > 0:
                            p = float(perf_str[0])
                            if p > best_perf:
                                best_perf = p

                    if best_perf > 0:
                        logger.info(
                            "Previously best checkpoint has performance {}".
                            format(best_perf))
                    else:
                        logger.info("Did not find any previous results.")

                    if current_perf > best_perf:
                        best_perf = current_perf
                        logger.info("Found new best-performing checkpoint!")
                        cp_best = "{}-{}{}".format(splitext[0], str(best_perf),
                                                   splitext[1])

                        os.rename(checkpoint_path_tmp, cp_best)

                except OSError:
                    # Probably a concurrent rename by another worker; continue and try again.
                    logger.exception("Could not rename checkpoint!")

        logger.info("Waiting...")
        sleep(10)

        if not do_loop:
            # Run only once.
            return
Esempio n. 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--p0-agent",
        type=str,
        choices=['static', 'rule', 'random', 'alphasheep', 'user'],
        required=True)
    parser.add_argument(
        "--alphasheep-checkpoint",
        help="Checkpoint for AlphaSheep, if --p0-agent=alphasheep.",
        required=False)
    parser.add_argument(
        "--agent-config",
        help="YAML file, containing agent specifications for AlphaSheep.",
        required=False)
    args = parser.parse_args()
    agent_choice = args.p0_agent
    as_checkpoint_path = args.alphasheep_checkpoint
    as_config_path = args.agent_config
    if agent_choice == "alphasheep" and (not as_checkpoint_path
                                         or not as_config_path):
        raise ValueError(
            "Need to specify --alphasheep-checkpoint and --agent-config if --p0_agent=alphasheep."
        )

    # Init logging and adjust log levels for some classes.
    init_logging()
    logger = get_named_logger("{}.main".format(
        os.path.splitext(os.path.basename(__file__))[0]))
    get_class_logger(GameController).setLevel(
        logging.DEBUG)  # Log every single card.
    get_class_logger(Gui).setLevel(logging.DEBUG)  # Log mouse clicks.
    get_class_logger(RuleBasedAgent).setLevel(
        logging.DEBUG)  # Log decisions by the rule-based players.

    # Create the agent for Player 0.
    if agent_choice == "alphasheep":

        # Load config. We ignore the "training" and "experiment" sections, but we need "agent_config".
        logger.info(f'Loading config from "{as_config_path}"...')
        config = load_config(as_config_path)
        get_class_logger(DQNAgent).setLevel(logging.DEBUG)  # Log Q-values.
        alphasheep_agent = DQNAgent(0, config=config, training=False)
        alphasheep_agent.load_weights(as_checkpoint_path)
        p0 = Player("0-AlphaSheep", agent=alphasheep_agent)
    elif agent_choice == "user":
        p0 = Player("0-User", agent=GUIAgent(0))
    elif agent_choice == "rule":
        p0 = Player("0-Hans", agent=RuleBasedAgent(0))
    elif agent_choice == "static":
        p0 = Player("0-Static", agent=StaticPolicyAgent(0))
    else:
        p0 = Player("0-RandomGuy", agent=RandomCardAgent(0))

    # Players 1-3 are RuleBasedAgents.
    players = [
        p0,
        Player("1-Zenzi", agent=RuleBasedAgent(1)),
        Player("2-Franz", agent=RuleBasedAgent(2)),
        Player("3-Andal", agent=RuleBasedAgent(3))
    ]

    # Rig the game so Player 0 has the cards to play a Herz-Solo.
    # Also, force them to play it.
    game_mode = GameMode(GameContract.suit_solo,
                         trump_suit=Suit.herz,
                         declaring_player_id=0)
    controller = GameController(players,
                                dealing_behavior=DealWinnableHand(game_mode),
                                forced_game_mode=game_mode)

    # The GUI initializes PyGame and registers on events provided by the controller. Everything single-threaded.
    #
    # The controller runs the game as usual. Whenever the GUI receives an event, it can block execution, so the controller must wait
    # for the GUI to return control. Until then, it can draw stuff and wait for user input (mouse clicks, card choices, ...).
    logger.info("Starting GUI.")
    with Gui(controller.game_state) as gui:
        # Run an endless loop of single games.
        logger.info("Starting game loop...")
        try:
            while True:
                controller.run_game()
        except UserQuitGameException:  # Closing the window or pressing [Esc]
            logger.info("User quit game.")

    logger.info("Shutdown.")
Esempio n. 8
0
    def __init__(self, player_id: int, config: Dict, training: bool):
        """
        Creates a new DQNAgent.
        :param player_id: The unique id of the player (0-3).
        :param config: config dict containing an agent_config node.
        :param training: If True, will train during play. This usually means worse performance (because of exploration).
                         If False, then the agent will always pick the highest-ranking valid action.
        """
        super().__init__(player_id)
        self.logger = get_class_logger(self)

        config = config["agent_config"]["dqn_agent"]
        self.config = config
        self.training = training

        # We encode cards as one-hot vectors of size 32.
        # Providing indices to perform quick lookups.
        self._id2card = new_deck()
        self._card2id = {card: i for i, card in enumerate(self._id2card)}

        # Determine length of state vector.
        state_lens = {
            "cards_in_hand": 32,
            "cards_in_trick": 3 * 32,
            "cards_already_played": 32
        }
        self._state_size = sum(state_lens[x] for x in config["state_contents"])

        # Action space: One action for every card.
        # Naturally, most actions will be invalid because the agent doesn't have the card or is not allowed to play it.
        self._action_size = 32

        # If True, then all unavailable actions are zeroed in the q-vector during learning. I thought this might improve training
        # speed, but it turned out to provide only a slight benefit. Incompatible with (and superseded by) allow_invalid_actions.
        self._zero_q_for_invalid_actions = config["zero_q_for_invalid_actions"]

        # If allowed, then the agent can choose an invalid card and get punished for it, while staying
        # in the same state. If not allowed, invalid actions are automatically skipped when playing.
        # See discussion in experiment_log.md
        self._allow_invalid_actions = config["allow_invalid_actions"]
        self._invalid_action_reward = config["invalid_action_reward"]
        if self._allow_invalid_actions and self._zero_q_for_invalid_actions:
            raise ValueError(
                "allow_invalid_actions and zero_q_for_invalid_actions are mutually exclusive."
            )

        # Discount and exploration rate
        self._gamma = config["gamma"]
        self._epsilon = config["epsilon"]

        # Experience replay buffer for minibatch learning
        self.experience_buffer = deque(maxlen=config["experience_buffer_len"])

        # Remember the state and action (card) played in the previous trick, so we can can judge it once we receive feedback.
        # Also remember which actions were valid at that time.
        self._prev_state = None
        self._prev_action = None
        self._prev_available_actions = None
        self._in_terminal_state = False

        # Create Q network (current state) and Target network (successor state). The networks are synced after every episode (game).
        self.q_network = self._build_model()
        self.target_network = self._build_model()
        self._align_target_model()
        self._batch_size = config["batch_size"]

        # Don't retrain after every single experience.
        # Retraining every time is expensive and doesn't add much information (rewards are received only at the end of the game).
        # If we wait for more experiences to accumulate before retraining, we get more fresh data before doing expensive training.
        # NOTE: This kind of breaks the "sync networks after every game" idea, but nevertheless is working very well to speed up training.
        self._retrain_every_n = config["retrain_every"]
        self._experiences_since_last_retrain = 0

        # Memory: here are some things the agent remembers between moves. This is basically feature engineering,
        # it would be more interesting to have the agent learn these with an RNN or so!
        self._mem_cards_already_played = set()

        # For display in the GUI
        self._current_q_vals = None
Esempio n. 9
0
    def __init__(self, player_id: int):
        super().__init__(player_id)

        self.logger = get_class_logger(self)
        self._select_card_callback = None