Ejemplo n.º 1
0
def log_strategy(writer: SummaryWriter, policy: NnPolicyWrapper,
                 global_step: int):
    infoset = KuhnPokerGame.KuhnInfoset(0, ())

    for card in range(3):
        infoset.card = card

        infoset.bet_sequence = ()
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_open" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (0, )
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_check/p1" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (0, 1)
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_check/p1_bet/p0" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (1, )
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_bet/p1" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)
Ejemplo n.º 2
0
    def get_game_value(self) -> float:
        game_values = []
        player_cards: Tuple[int, int]

        for my_card in range(3):
            opponent_card_probs = np.array([0.5, 0.5, 0.5])
            opponent_card_probs[my_card] = 0
            my_infoset = KuhnPoker.KuhnInfoset(card=my_card, bet_sequence=())
            game_value = self._get_game_state_value(my_infoset,
                                                    opponent_card_probs)
            game_values.append(game_value[self.player_num])

        return sum(game_values) / len(game_values)
Ejemplo n.º 3
0
    def _get_opponent_game_value(
            self, my_infoset: KuhnPoker.KuhnInfoset,
            opponent_card_probs: np.ndarray) -> np.ndarray:
        cards_to_action_probs = {}
        total_action_probs = np.zeros(2)
        for opponent_card in range(3):
            if opponent_card == my_infoset.card:
                continue
            opponent_infoset = KuhnPoker.KuhnInfoset(
                card=opponent_card, bet_sequence=my_infoset.bet_sequence)
            aggressive_action_prob = self.opponent_policy.aggressive_action_prob(
                opponent_infoset)
            passive_action_prob = 1.0 - aggressive_action_prob
            cards_to_action_probs[opponent_card] = [
                passive_action_prob, aggressive_action_prob
            ]
            total_action_probs += opponent_card_probs[opponent_card] * np.array(
                [passive_action_prob, aggressive_action_prob])

        retval = np.zeros(2)
        for action in [0, 1]:
            if total_action_probs[action] == 0:
                continue
            post_action_card_probs = opponent_card_probs.copy()
            for opponent_card in range(3):
                if opponent_card == my_infoset.card:
                    continue
                post_action_card_probs[opponent_card] *= cards_to_action_probs[
                    opponent_card][action]
            post_action_card_probs = self._normalize(post_action_card_probs)
            bet_sequence = my_infoset.bet_sequence + (action, )
            game_value = self._get_game_state_value(
                KuhnPoker.KuhnInfoset(my_infoset.card, bet_sequence),
                post_action_card_probs)
            retval += game_value * total_action_probs[action]

        return retval
Ejemplo n.º 4
0
    def _get_game_state_value(self, my_infoset: KuhnPoker.KuhnInfoset,
                              opponent_card_probs: np.ndarray) -> np.ndarray:
        if my_infoset.is_terminal:
            return self._get_terminal_game_state_value(my_infoset,
                                                       my_infoset.card,
                                                       opponent_card_probs)
        elif my_infoset.player_to_act != self.player_num:
            return self._get_opponent_game_value(my_infoset,
                                                 opponent_card_probs)
        else:
            state_value = np.array([float('-inf'), float('-inf')])

            for action in (0, 1):
                bet_sequence = my_infoset.bet_sequence + (action, )
                new_infoset = KuhnPoker.KuhnInfoset(my_infoset.card,
                                                    bet_sequence)
                action_value = self._get_game_state_value(
                    new_infoset, opponent_card_probs)
                if action_value[self.player_num] > state_value[
                        self.player_num]:
                    state_value = action_value

            return state_value