Esempio n. 1
0
        def add_terminals_to_utilities(
                pot_commitment, players_folded,
                sampling_strategy_reach_probabilities,
                evaluated_strategies_reach_probabilities):
            nonlocal utilities
            nonlocal player

            sampling_strategy_reach_probability_sum = np.sum(
                sampling_strategy_reach_probabilities)
            if sampling_strategy_reach_probability_sum == 0:
                return

            for i in range(num_nodes):
                utility = None
                if players_folded[player]:
                    utility = -pot_commitment[player]
                else:
                    hole_cards = [
                        possible_player_hole_cards[i]
                        if p == player else opponent_hole_cards
                        for p in range(num_players)
                    ]
                    utility = get_utility(hole_cards, all_board_cards,
                                          players_folded,
                                          pot_commitment)[player]
                utilities += utility * (
                    evaluated_strategies_reach_probabilities[:, i] /
                    sampling_strategy_reach_probability_sum)
Esempio n. 2
0
    def get_utility_estimations(self, state, player, sampling_strategy, evaluated_strategies=None):
        if evaluated_strategies is None:
            evaluated_strategies = [sampling_strategy]

        num_players = self.game.get_num_players()
        opponent_player = (player + 1) % 2

        num_evaluated_strategies = len(evaluated_strategies)

        player_hole_cards = tuple(sorted([state.get_hole_card(player, c) for c in range(self.game.get_num_hole_cards())]))

        any_player_folded = False
        for p in range(num_players):
            any_player_folded = any_player_folded or state.get_player_folded(p)

        all_board_cards = get_all_board_cards(self.game, state)

        evaluated_strategies_nodes = [node.children[player_hole_cards] for node in evaluated_strategies]
        sampling_strategy_node = sampling_strategy.children[player_hole_cards]

        evaluated_strategies_reach_probabilities = np.ones(num_evaluated_strategies)
        sampling_strategy_reach_probability = 1

        round_index = 0
        action_index = 0
        while True:
            node = evaluated_strategies_nodes[0]
            if isinstance(node, BoardCardsNode):
                new_board_cards = get_board_cards(self.game, state, round_index)
                evaluated_strategies_nodes = [node.children[new_board_cards] for node in evaluated_strategies_nodes]
                sampling_strategy_node = sampling_strategy_node.children[new_board_cards]
            elif isinstance(node, ActionNode):
                action = convert_action_to_int(state.get_action_type(round_index, action_index))
                if node.player == player:
                    sampling_strategy_reach_probability *= sampling_strategy_node.strategy[action]
                    for i in range(num_evaluated_strategies):
                        evaluated_strategies_reach_probabilities[i] *= evaluated_strategies_nodes[i].strategy[action]

                action_index += 1
                if action_index == state.get_num_actions(round_index):
                    round_index += 1
                    action_index = 0
                evaluated_strategies_nodes = [node.children[action] for node in evaluated_strategies_nodes]
                sampling_strategy_node = sampling_strategy_node.children[action]
            elif isinstance(node, TerminalNode):
                players_folded = [state.get_player_folded(p) for p in range(num_players)]
                if players_folded[player]:
                    utility = -node.pot_commitment[player]
                else:
                    opponent_hole_cards = [state.get_hole_card(opponent_player, c) for c in range(self.game.get_num_hole_cards())]
                    hole_cards = [player_hole_cards if p == player else opponent_hole_cards for p in range(num_players)]
                    utility = get_utility(
                        hole_cards,
                        all_board_cards,
                        players_folded,
                        node.pot_commitment)[player]
                return utility * (evaluated_strategies_reach_probabilities / sampling_strategy_reach_probability)
    def _internal_get_player_utilities(self, nodes, hole_cards, board_cards,
                                       players_folded, callback):
        node = nodes[0]
        if isinstance(node, TerminalNode):
            return get_utility(hole_cards, board_cards, players_folded,
                               node.pot_commitment)
        elif isinstance(node, HoleCardsNode):
            values = np.zeros([0, self.game.get_num_players()])

            hole_cards = [node.children for node in nodes]
            for hole_cards_combination in itertools.product(*hole_cards):
                if is_unique(*hole_cards_combination):
                    new_nodes = [
                        node.children[hole_cards_combination[i]]
                        for i, node in enumerate(nodes)
                    ]
                    player_values = self.get_player_utilities(
                        new_nodes, hole_cards_combination, board_cards,
                        players_folded, callback)
                    values = np.append(values, [player_values], 0)
            return np.mean(values, 0)
        elif isinstance(node, BoardCardsNode):
            possible_board_cards = intersection(
                *map(lambda node: node.children, nodes))
            values = np.zeros(
                [len(possible_board_cards),
                 self.game.get_num_players()])
            for i, next_board_cards in enumerate(possible_board_cards):
                new_nodes = [node.children[next_board_cards] for node in nodes]
                new_board_cards = flatten(board_cards, next_board_cards)
                values[i, :] = self.get_player_utilities(
                    new_nodes, hole_cards, new_board_cards, players_folded,
                    callback)
            return np.mean(values, 0)
        else:
            current_player_node = nodes[node.player]
            utilities = np.zeros(self.game.get_num_players())
            for a in current_player_node.children:
                new_nodes = [node.children[a] for node in nodes]
                new_players_folded = players_folded
                if a == 0:
                    new_players_folded = list(players_folded)
                    new_players_folded[current_player_node.player] = True
                action_utilities = self.get_player_utilities(
                    new_nodes, hole_cards, board_cards, new_players_folded,
                    callback)
                utilities += action_utilities * current_player_node.strategy[a]
            return utilities
Esempio n. 4
0
    def _solve(
            self,
            player_position,
            best_response_node,
            player_states,
            best_response_cards,
            board_cards):

        if isinstance(best_response_node, TerminalNode):
            parent_action = get_parent_action(best_response_node)
            players_folded = [False] * 2
            if parent_action == 0:
                player_folded = 0 if best_response_node.parent.player == player_position else 1
                players_folded[player_folded] = True

            player_value_sum = 0
            for state in player_states:
                hands = [state[2], best_response_cards]
                pot_commitment = best_response_node.pot_commitment
                if player_position == 1:
                    pot_commitment = np.flip(pot_commitment, axis=0)
                player_utilities = get_utility(hands, board_cards, players_folded, pot_commitment)
                player_value_sum += player_utilities[0] * state[1]
            return player_value_sum

        elif isinstance(best_response_node, HoleCardsNode):
            player_values_sum = 0
            for cards in best_response_node.children:
                new_best_response_cards = flatten(best_response_cards, cards)
                new_player_states = np.empty([0, 3])
                for other_cards in best_response_node.children:
                    if len(intersection(cards, other_cards)) == 0 and len(intersection(cards, board_cards)) == 0:
                        for state in player_states:
                            new_player_states = np.append(
                                new_player_states,
                                [[state[0].children[other_cards], state[1], other_cards]],
                                axis=0)

                player_values_sum += self._solve(
                    player_position,
                    best_response_node.children[cards],
                    new_player_states,
                    new_best_response_cards,
                    board_cards)
            return player_values_sum / len(best_response_node.children)

        elif isinstance(best_response_node, BoardCardsNode):
            player_values_sum = 0
            for cards in best_response_node.children:
                new_board_cards = flatten(board_cards, cards)

                new_player_states = np.empty([0, 3])
                for state in player_states:
                    if cards in state[0].children:
                        new_player_states = np.append(
                            new_player_states,
                            [[state[0].children[cards], state[1], state[2]]],
                            axis=0)

                player_values_sum += self._solve(
                    player_position,
                    best_response_node.children[cards],
                    new_player_states,
                    best_response_cards,
                    new_board_cards)
            return player_values_sum / len(best_response_node.children)

        elif best_response_node.player == player_position:
            values_sum = 0
            for a in best_response_node.children:
                new_player_states = np.empty([0, 3])
                for state in player_states:
                    new_player_states = np.append(
                        new_player_states,
                        [[state[0].children[a], state[1] * state[0].strategy[a], state[2]]],
                        axis=0)

                values_sum += self._solve(
                    player_position,
                    best_response_node.children[a],
                    new_player_states,
                    best_response_cards,
                    board_cards)
            return values_sum

        else:
            best_value = None
            best_value_actions = None
            for a in best_response_node.children:
                new_player_states = np.empty([0, 3])
                for state in player_states:
                    new_player_states = np.append(
                        new_player_states,
                        [[state[0].children[a], state[1], state[2]]],
                        axis=0)

                player_value = self._solve(
                    player_position,
                    best_response_node.children[a],
                    new_player_states,
                    best_response_cards,
                    board_cards)

                if (best_value is None) or (player_value < best_value):
                    best_value = player_value
                    best_value_actions = [a]
                elif player_value == best_value:
                    best_value_actions.append(a)
            best_value_action_probability = 1 / len(best_value_actions)
            for a in best_value_actions:
                best_response_node.strategy[a] = best_value_action_probability
            return best_value
Esempio n. 5
0
 def _cfr_terminal(self, player, nodes, hole_cards, board_cards,
                   players_folded, opponent_reach_prob):
     return get_utility(
         hole_cards, board_cards, players_folded,
         nodes[0].pot_commitment)[player] * opponent_reach_prob
 def test_get_utility_board_cards(self):
     self.assertEqual(
         get_utility([(51, ), (47, )], [46], [False, False],
                     [1, 1]).tolist(), [-1, 1])
 def test_get_utility_uneven_pot(self):
     self.assertEqual(
         get_utility([(51, ), (47, )], [], [False, False], [5, 1]).tolist(),
         [1, -1])
 def test_get_utility_folded_player(self):
     self.assertEqual(
         get_utility([(51, ), (47, )], [], [True, False], [1, 1]).tolist(),
         [-1, 1])