def _internal_get_player_utilities(self, nodes, hole_cards, board_cards,
                                       players_folded, callback):
        node = nodes[0]
        if isinstance(node, TerminalNode):
            return get_utility(hole_cards, board_cards, players_folded,
                               node.pot_commitment)
        elif isinstance(node, HoleCardsNode):
            values = np.zeros([0, self.game.get_num_players()])

            hole_cards = [node.children for node in nodes]
            for hole_cards_combination in itertools.product(*hole_cards):
                if is_unique(*hole_cards_combination):
                    new_nodes = [
                        node.children[hole_cards_combination[i]]
                        for i, node in enumerate(nodes)
                    ]
                    player_values = self.get_player_utilities(
                        new_nodes, hole_cards_combination, board_cards,
                        players_folded, callback)
                    values = np.append(values, [player_values], 0)
            return np.mean(values, 0)
        elif isinstance(node, BoardCardsNode):
            possible_board_cards = intersection(
                *map(lambda node: node.children, nodes))
            values = np.zeros(
                [len(possible_board_cards),
                 self.game.get_num_players()])
            for i, next_board_cards in enumerate(possible_board_cards):
                new_nodes = [node.children[next_board_cards] for node in nodes]
                new_board_cards = flatten(board_cards, next_board_cards)
                values[i, :] = self.get_player_utilities(
                    new_nodes, hole_cards, new_board_cards, players_folded,
                    callback)
            return np.mean(values, 0)
        else:
            current_player_node = nodes[node.player]
            utilities = np.zeros(self.game.get_num_players())
            for a in current_player_node.children:
                new_nodes = [node.children[a] for node in nodes]
                new_players_folded = players_folded
                if a == 0:
                    new_players_folded = list(players_folded)
                    new_players_folded[current_player_node.player] = True
                action_utilities = self.get_player_utilities(
                    new_nodes, hole_cards, board_cards, new_players_folded,
                    callback)
                utilities += action_utilities * current_player_node.strategy[a]
            return utilities
Example #2
0
    def _cfr_hole_cards(self, player, nodes, hole_cards, board_cards,
                        players_folded, opponent_reach_prob):
        hole_card_combination_probability = 1 / get_num_hole_card_combinations(
            self.game)
        hole_cards = [node.children for node in nodes]
        hole_card_combinations = filter(lambda comb: is_unique(*comb),
                                        itertools.product(*hole_cards))

        value_sum = 0
        for hole_cards_combination in hole_card_combinations:
            next_nodes = [
                node.children[hole_cards_combination[i]]
                for i, node in enumerate(nodes)
            ]
            player_utility = self._cfr(player, next_nodes,
                                       hole_cards_combination, board_cards,
                                       players_folded, opponent_reach_prob)
            value_sum += player_utility * hole_card_combination_probability
        return value_sum
Example #3
0
    def get_utility_estimations(self,
                                state,
                                player,
                                sampling_strategy,
                                evaluated_strategies=None):
        if evaluated_strategies is None:
            evaluated_strategies = [sampling_strategy]

        num_players = self.game.get_num_players()
        opponent_player = (player + 1) % 2

        num_evaluated_strategies = len(evaluated_strategies)

        utilities = np.zeros(num_evaluated_strategies)

        any_player_folded = False
        for p in range(num_players):
            any_player_folded = any_player_folded or state.get_player_folded(p)

        all_board_cards = get_all_board_cards(self.game, state)

        all_info_available = not (any_player_folded and self.mucking_enabled)

        opponent_hole_cards = None
        possible_player_hole_cards = None
        if not all_info_available:
            possible_player_hole_cards = list(
                filter(
                    lambda hole_cards: is_unique(hole_cards, all_board_cards),
                    sampling_strategy.children))
            opponent_nodes = [
                list(
                    map(
                        lambda c: self.equilibirum_strategy.children[c],
                        filter(
                            lambda c: is_unique(hole_cards, c, all_board_cards
                                                ),
                            self.equilibirum_strategy.children)))
                for hole_cards in possible_player_hole_cards
            ]
        else:
            opponent_hole_cards = [
                state.get_hole_card(opponent_player, c)
                for c in range(self.game.get_num_hole_cards())
            ]
            possible_player_hole_cards = list(
                filter(
                    lambda hole_cards: is_unique(
                        hole_cards, opponent_hole_cards, all_board_cards),
                    sampling_strategy.children))
            opponent_nodes = [[
                self.equilibirum_strategy.children[tuple(
                    sorted(opponent_hole_cards))]
            ] for _ in range(len(possible_player_hole_cards))]
        nodes = [[
            expert_node.children[hole_cards]
            for hole_cards in possible_player_hole_cards
        ] for expert_node in evaluated_strategies]
        sampling_strategy_nodes = [
            sampling_strategy.children[hole_cards]
            for hole_cards in possible_player_hole_cards
        ]

        num_nodes = len(possible_player_hole_cards)
        evaluated_strategies_reach_probabilities = np.ones(
            [num_evaluated_strategies, num_nodes])
        sampling_strategy_reach_probabilities = np.ones(num_nodes)
        opponent_nodes_reach_probabilities = np.ones(
            [len(opponent_nodes), len(opponent_nodes[0])])

        # Calculate correction term for hole cards
        if all_info_available:
            histories_actions_utilities = {}
            for i in range(num_nodes):
                history_actions_utilities = {}
                histories_actions_utilities[i] = history_actions_utilities
                for a in filter(lambda a: a != possible_player_hole_cards[i],
                                sampling_strategy.children):
                    nodes_tmp = [None, None]
                    nodes_tmp[player] = sampling_strategy_nodes[i]
                    nodes_tmp[opponent_player] = sampling_strategy.children[a]
                    key = ';'.join(map(lambda m: str(m), nodes_tmp))
                    history_actions_utilities[a] = self.utilities_dict[key][
                        player]

            history_sampling_strategy_reach_probabilities_sum = np.sum(
                sampling_strategy_reach_probabilities)
            current_history_expected_value = 0
            for i in range(num_nodes):
                for a in filter(lambda a: a != possible_player_hole_cards[i],
                                sampling_strategy.children):
                    current_history_expected_value += histories_actions_utilities[i][a] \
                        * sampling_strategy_reach_probabilities[i] / num_nodes

            for i in range(num_nodes):
                sampling_strategy_reach_probabilities[i] /= num_nodes
                for j in range(num_evaluated_strategies):
                    evaluated_strategies_reach_probabilities[j, i] /= num_nodes

            next_history_sampling_strategy_reach_probabilities_sum = np.sum(
                sampling_strategy_reach_probabilities)
            next_history_expected_value = 0
            for i in range(num_nodes):
                next_history_expected_value += histories_actions_utilities[i][tuple(sorted(opponent_hole_cards))] \
                    * sampling_strategy_reach_probabilities[i]
            utilities += \
                (current_history_expected_value / history_sampling_strategy_reach_probabilities_sum) \
                - (next_history_expected_value / next_history_sampling_strategy_reach_probabilities_sum)

        def add_terminals_to_utilities(
                pot_commitment, players_folded,
                sampling_strategy_reach_probabilities,
                evaluated_strategies_reach_probabilities):
            nonlocal utilities
            nonlocal player

            sampling_strategy_reach_probability_sum = np.sum(
                sampling_strategy_reach_probabilities)
            if sampling_strategy_reach_probability_sum == 0:
                return

            for i in range(num_nodes):
                utility = None
                if players_folded[player]:
                    utility = -pot_commitment[player]
                else:
                    hole_cards = [
                        possible_player_hole_cards[i]
                        if p == player else opponent_hole_cards
                        for p in range(num_players)
                    ]
                    utility = get_utility(hole_cards, all_board_cards,
                                          players_folded,
                                          pot_commitment)[player]
                utilities += utility * (
                    evaluated_strategies_reach_probabilities[:, i] /
                    sampling_strategy_reach_probability_sum)

        def update_reach_proabilities(
                action, sampling_strategy_nodes, nodes,
                sampling_strategy_reach_probabilities,
                evaluated_strategies_reach_probabilities):
            for i in range(num_nodes):
                sampling_strategy_reach_probabilities[
                    i] *= sampling_strategy_nodes[i].strategy[action]
                for j in range(num_evaluated_strategies):
                    evaluated_strategies_reach_probabilities[
                        j, i] *= nodes[j][i].strategy[action]

        round_index = 0
        action_index = 0
        while True:
            node = nodes[0][0]
            if isinstance(node, BoardCardsNode):
                new_board_cards = get_board_cards(self.game, state,
                                                  round_index)

                # Calculate correction term for board cards
                # if all_info_available:

                # TODO NOtok
                num_board_cards = len(opponent_nodes[0][0].children)

                histories_actions_utilities = {}
                for i in range(num_nodes):
                    history_actions_utilities = {}
                    histories_actions_utilities[i] = history_actions_utilities
                    for a in filter(
                            lambda a: a in opponent_nodes[i][0].children
                            if len(opponent_nodes[i]) == 1 else True,
                            sampling_strategy_nodes[i].children):
                        opponent_nodes_utilities = np.zeros(
                            len(opponent_nodes[i]))
                        for j, opponent_node in enumerate(opponent_nodes[i]):
                            if a in opponent_node.children:
                                nodes_tmp = [None, None]
                                nodes_tmp[player] = sampling_strategy_nodes[
                                    i].children[a]
                                nodes_tmp[
                                    opponent_player] = opponent_node.children[
                                        a]
                                key = ';'.join(map(lambda m: str(m),
                                                   nodes_tmp))
                                opponent_nodes_utilities[
                                    j] = self.utilities_dict[key][player]
                        if np.sum(opponent_nodes_reach_probabilities[i]) != 0:
                            opponent_reach_ratios = opponent_nodes_reach_probabilities[
                                i] / np.sum(
                                    opponent_nodes_reach_probabilities[i])
                        else:
                            opponent_reach_ratios = 1 / np.ones(
                                len(opponent_nodes_reach_probabilities[i]))
                        history_actions_utilities[a] = np.sum(
                            opponent_nodes_utilities * opponent_reach_ratios)

                history_sampling_strategy_reach_probabilities_sum = np.sum(
                    sampling_strategy_reach_probabilities)
                importance_sampling_ratio = np.sum(
                    evaluated_strategies_reach_probabilities,
                    axis=1) / history_sampling_strategy_reach_probabilities_sum

                current_history_expected_value = 0
                for i in range(num_nodes):
                    # for a in filter(lambda a: a in sampling_strategy_nodes[i].children, opponent_node.children):
                    for a in filter(
                            lambda a: a in opponent_nodes[i][0].children
                            if len(opponent_nodes[i]) == 1 else True,
                            sampling_strategy_nodes[i].children):
                        current_history_expected_value += histories_actions_utilities[i][a] \
                            * sampling_strategy_reach_probabilities[i] / num_board_cards

                for i in range(num_nodes):
                    sampling_strategy_reach_probabilities[i] /= num_board_cards
                    for j in range(num_evaluated_strategies):
                        evaluated_strategies_reach_probabilities[
                            j, i] /= num_board_cards

                next_history_sampling_strategy_reach_probabilities_sum = np.sum(
                    sampling_strategy_reach_probabilities)
                next_history_expected_value = 0
                for i in range(num_nodes):
                    next_history_expected_value += histories_actions_utilities[i][new_board_cards] \
                        * sampling_strategy_reach_probabilities[i]
                utilities += \
                    ((current_history_expected_value / history_sampling_strategy_reach_probabilities_sum) \
                    - (next_history_expected_value / next_history_sampling_strategy_reach_probabilities_sum)) * importance_sampling_ratio

                nodes = [[
                    expert_node.children[new_board_cards]
                    for expert_node in expert_nodes
                ] for expert_nodes in nodes]
                sampling_strategy_nodes = [
                    node.children[new_board_cards]
                    for node in sampling_strategy_nodes
                ]
                # if all_info_available:
                #     opponent_node = opponent_node.children[new_board_cards]
                opponent_nodes = [[
                    opponent_nodes[i][j].children[new_board_cards]
                    for j in range(len(opponent_nodes[i]))
                ] for i in range(num_nodes)]
            elif isinstance(node, ActionNode):
                action = convert_action_to_int(
                    state.get_action_type(round_index, action_index))
                if node.player == player:
                    # Calculate correction term for player actions
                    # if all_info_available:
                    histories_actions_utilities = {}
                    for i in range(num_nodes):
                        history_actions_utilities = {}
                        histories_actions_utilities[
                            i] = history_actions_utilities
                        for a in filter(
                                lambda a: a in opponent_nodes[i][0].children
                                if len(opponent_nodes[i]) == 1 else True,
                                sampling_strategy_nodes[i].children):
                            opponent_nodes_utilities = np.zeros(
                                len(opponent_nodes[i]))
                            for j, opponent_node in enumerate(
                                    opponent_nodes[i]):
                                nodes_tmp = [None, None]
                                nodes_tmp[player] = sampling_strategy_nodes[
                                    i].children[a]
                                nodes_tmp[
                                    opponent_player] = opponent_node.children[
                                        a]
                                key = ';'.join(map(lambda m: str(m),
                                                   nodes_tmp))
                                opponent_nodes_utilities[
                                    j] = self.utilities_dict[key][player]
                            if np.sum(opponent_nodes_reach_probabilities[i]
                                      ) != 0:
                                opponent_reach_ratios = opponent_nodes_reach_probabilities[
                                    i] / np.sum(
                                        opponent_nodes_reach_probabilities[i])
                            else:
                                opponent_reach_ratios = 1 / np.ones(
                                    len(opponent_nodes_reach_probabilities[i]))
                            history_actions_utilities[a] = np.sum(
                                opponent_nodes_utilities *
                                opponent_reach_ratios)

                    history_sampling_strategy_reach_probabilities_sum = np.sum(
                        sampling_strategy_reach_probabilities)
                    importance_sampling_ratio = np.sum(
                        evaluated_strategies_reach_probabilities, axis=1
                    ) / history_sampling_strategy_reach_probabilities_sum

                    current_history_expected_value = 0
                    for i in range(num_nodes):
                        for a in sampling_strategy_nodes[i].children:
                            current_history_expected_value += histories_actions_utilities[i][a] \
                                * sampling_strategy_reach_probabilities[i] * sampling_strategy_nodes[i].strategy[a]

                    update_reach_proabilities(
                        action, sampling_strategy_nodes, nodes,
                        sampling_strategy_reach_probabilities,
                        evaluated_strategies_reach_probabilities)

                    # if all_info_available:
                    next_history_sampling_strategy_reach_probabilities_sum = np.sum(
                        sampling_strategy_reach_probabilities)
                    next_history_expected_value = 0
                    for i in range(num_nodes):
                        next_history_expected_value += histories_actions_utilities[i][action] \
                            * sampling_strategy_reach_probabilities[i]
                    try:
                        utilities += \
                            ((current_history_expected_value / history_sampling_strategy_reach_probabilities_sum) \
                            - (next_history_expected_value / next_history_sampling_strategy_reach_probabilities_sum)) * importance_sampling_ratio
                    except:
                        print()
                else:
                    for i in range(num_nodes):
                        for j in range(len(opponent_nodes[0])):
                            opponent_nodes_reach_probabilities[
                                i, j] *= opponent_nodes[i][j].strategy[action]

                action_index += 1
                if action_index == state.get_num_actions(round_index):
                    round_index += 1
                    action_index = 0
                nodes = [[
                    expert_node.children[action]
                    for expert_node in expert_nodes
                ] for expert_nodes in nodes]
                sampling_strategy_nodes = [
                    node.children[action] for node in sampling_strategy_nodes
                ]
                opponent_nodes = [[
                    opponent_nodes[i][j].children[action]
                    for j in range(len(opponent_nodes[i]))
                ] for i in range(num_nodes)]
            elif isinstance(node, TerminalNode):
                players_folded = [
                    state.get_player_folded(p) for p in range(num_players)
                ]
                add_terminals_to_utilities(
                    node.pot_commitment, players_folded,
                    sampling_strategy_reach_probabilities,
                    evaluated_strategies_reach_probabilities)
                break

        return utilities
Example #4
0
    def get_utility_estimations(self,
                                state,
                                player,
                                sampling_strategy,
                                evaluated_strategies=None):
        if evaluated_strategies is None:
            evaluated_strategies = [sampling_strategy]

        num_players = self.game.get_num_players()
        opponent_player = (player + 1) % 2

        num_evaluated_strategies = len(evaluated_strategies)

        utilities = np.zeros(num_evaluated_strategies)

        any_player_folded = False
        for p in range(num_players):
            any_player_folded = any_player_folded or state.get_player_folded(p)

        all_board_cards = get_all_board_cards(self.game, state)

        opponent_hole_cards = None
        possible_player_hole_cards = None
        if any_player_folded and self.mucking_enabled:
            possible_player_hole_cards = list(
                filter(
                    lambda hole_cards: is_unique(hole_cards, all_board_cards),
                    sampling_strategy.children))
        else:
            opponent_hole_cards = [
                state.get_hole_card(opponent_player, c)
                for c in range(self.game.get_num_hole_cards())
            ]
            possible_player_hole_cards = list(
                filter(
                    lambda hole_cards: is_unique(
                        hole_cards, opponent_hole_cards, all_board_cards),
                    sampling_strategy.children))
        nodes = [[
            expert_node.children[hole_cards]
            for hole_cards in possible_player_hole_cards
        ] for expert_node in evaluated_strategies]
        sampling_strategy_nodes = [
            sampling_strategy.children[hole_cards]
            for hole_cards in possible_player_hole_cards
        ]

        num_nodes = len(possible_player_hole_cards)
        evaluated_strategies_reach_probabilities = np.ones(
            [num_evaluated_strategies, num_nodes])
        sampling_strategy_reach_probabilities = np.ones(num_nodes)

        def add_terminals_to_utilities(
                pot_commitment, players_folded,
                sampling_strategy_reach_probabilities,
                evaluated_strategies_reach_probabilities):
            nonlocal utilities
            nonlocal player

            sampling_strategy_reach_probability_sum = np.sum(
                sampling_strategy_reach_probabilities)
            if sampling_strategy_reach_probability_sum == 0:
                return

            for i in range(num_nodes):
                utility = None
                if players_folded[player]:
                    utility = -pot_commitment[player]
                else:
                    hole_cards = [
                        possible_player_hole_cards[i]
                        if p == player else opponent_hole_cards
                        for p in range(num_players)
                    ]
                    utility = get_utility(hole_cards, all_board_cards,
                                          players_folded,
                                          pot_commitment)[player]
                utilities += utility * (
                    evaluated_strategies_reach_probabilities[:, i] /
                    sampling_strategy_reach_probability_sum)

        def update_reach_proabilities(
                action, sampling_strategy_nodes, nodes,
                sampling_strategy_reach_probabilities,
                evaluated_strategies_reach_probabilities):
            for i in range(num_nodes):
                if sampling_strategy_reach_probabilities is not None:
                    sampling_strategy_reach_probabilities[
                        i] *= sampling_strategy_nodes[i].strategy[action]

                for j in range(num_evaluated_strategies):
                    evaluated_strategies_reach_probabilities[
                        j, i] *= nodes[j][i].strategy[action]

        round_index = 0
        action_index = 0
        while True:
            node = nodes[0][0]
            if isinstance(node, BoardCardsNode):
                new_board_cards = get_board_cards(self.game, state,
                                                  round_index)
                nodes = [[
                    expert_node.children[new_board_cards]
                    for expert_node in expert_nodes
                ] for expert_nodes in nodes]
                sampling_strategy_nodes = [
                    node.children[new_board_cards]
                    for node in sampling_strategy_nodes
                ]
            elif isinstance(node, ActionNode):
                action = convert_action_to_int(
                    state.get_action_type(round_index, action_index))
                if node.player == player:
                    # for other_action in filter(lambda a: a != action and isinstance(node.children[a], TerminalNode), node.children):
                    #     sampling_strategy_reach_probabilities_copy = np.copy(sampling_strategy_reach_probabilities)
                    #     evaluated_strategies_reach_probabilities_copy = np.copy(evaluated_strategies_reach_probabilities)
                    #     update_reach_proabilities(
                    #         other_action,
                    #         sampling_strategy_nodes,
                    #         nodes,
                    #         sampling_strategy_reach_probabilities_copy,
                    #         evaluated_strategies_reach_probabilities_copy)

                    #     players_folded = [True if p == player and other_action == 0 else False for p in range(2)]

                    #     add_terminals_to_utilities(
                    #         node.children[other_action].pot_commitment,
                    #         players_folded,
                    #         sampling_strategy_reach_probabilities,
                    #         evaluated_strategies_reach_probabilities_copy)

                    update_reach_proabilities(
                        action, sampling_strategy_nodes, nodes,
                        sampling_strategy_reach_probabilities,
                        evaluated_strategies_reach_probabilities)

                action_index += 1
                if action_index == state.get_num_actions(round_index):
                    round_index += 1
                    action_index = 0
                nodes = [[
                    expert_node.children[action]
                    for expert_node in expert_nodes
                ] for expert_nodes in nodes]
                sampling_strategy_nodes = [
                    node.children[action] for node in sampling_strategy_nodes
                ]
            elif isinstance(node, TerminalNode):
                players_folded = [
                    state.get_player_folded(p) for p in range(num_players)
                ]
                add_terminals_to_utilities(
                    node.pot_commitment, players_folded,
                    sampling_strategy_reach_probabilities,
                    evaluated_strategies_reach_probabilities)
                break

        return utilities
 def test_is_unique_false(self):
     self.assertEqual(is_unique([1, 2], [3], [3, 4, 5]), False)
 def test_is_unique_true(self):
     self.assertEqual(is_unique((1, 2), [3], [4, 5]), True)