def add_terminals_to_utilities( pot_commitment, players_folded, sampling_strategy_reach_probabilities, evaluated_strategies_reach_probabilities): nonlocal utilities nonlocal player sampling_strategy_reach_probability_sum = np.sum( sampling_strategy_reach_probabilities) if sampling_strategy_reach_probability_sum == 0: return for i in range(num_nodes): utility = None if players_folded[player]: utility = -pot_commitment[player] else: hole_cards = [ possible_player_hole_cards[i] if p == player else opponent_hole_cards for p in range(num_players) ] utility = get_utility(hole_cards, all_board_cards, players_folded, pot_commitment)[player] utilities += utility * ( evaluated_strategies_reach_probabilities[:, i] / sampling_strategy_reach_probability_sum)
def get_utility_estimations(self, state, player, sampling_strategy, evaluated_strategies=None): if evaluated_strategies is None: evaluated_strategies = [sampling_strategy] num_players = self.game.get_num_players() opponent_player = (player + 1) % 2 num_evaluated_strategies = len(evaluated_strategies) player_hole_cards = tuple(sorted([state.get_hole_card(player, c) for c in range(self.game.get_num_hole_cards())])) any_player_folded = False for p in range(num_players): any_player_folded = any_player_folded or state.get_player_folded(p) all_board_cards = get_all_board_cards(self.game, state) evaluated_strategies_nodes = [node.children[player_hole_cards] for node in evaluated_strategies] sampling_strategy_node = sampling_strategy.children[player_hole_cards] evaluated_strategies_reach_probabilities = np.ones(num_evaluated_strategies) sampling_strategy_reach_probability = 1 round_index = 0 action_index = 0 while True: node = evaluated_strategies_nodes[0] if isinstance(node, BoardCardsNode): new_board_cards = get_board_cards(self.game, state, round_index) evaluated_strategies_nodes = [node.children[new_board_cards] for node in evaluated_strategies_nodes] sampling_strategy_node = sampling_strategy_node.children[new_board_cards] elif isinstance(node, ActionNode): action = convert_action_to_int(state.get_action_type(round_index, action_index)) if node.player == player: sampling_strategy_reach_probability *= sampling_strategy_node.strategy[action] for i in range(num_evaluated_strategies): evaluated_strategies_reach_probabilities[i] *= evaluated_strategies_nodes[i].strategy[action] action_index += 1 if action_index == state.get_num_actions(round_index): round_index += 1 action_index = 0 evaluated_strategies_nodes = [node.children[action] for node in evaluated_strategies_nodes] sampling_strategy_node = sampling_strategy_node.children[action] elif isinstance(node, TerminalNode): players_folded = [state.get_player_folded(p) for p in range(num_players)] if players_folded[player]: utility = -node.pot_commitment[player] else: opponent_hole_cards = [state.get_hole_card(opponent_player, c) for c in range(self.game.get_num_hole_cards())] hole_cards = [player_hole_cards if p == player else opponent_hole_cards for p in range(num_players)] utility = get_utility( hole_cards, all_board_cards, players_folded, node.pot_commitment)[player] return utility * (evaluated_strategies_reach_probabilities / sampling_strategy_reach_probability)
def _internal_get_player_utilities(self, nodes, hole_cards, board_cards, players_folded, callback): node = nodes[0] if isinstance(node, TerminalNode): return get_utility(hole_cards, board_cards, players_folded, node.pot_commitment) elif isinstance(node, HoleCardsNode): values = np.zeros([0, self.game.get_num_players()]) hole_cards = [node.children for node in nodes] for hole_cards_combination in itertools.product(*hole_cards): if is_unique(*hole_cards_combination): new_nodes = [ node.children[hole_cards_combination[i]] for i, node in enumerate(nodes) ] player_values = self.get_player_utilities( new_nodes, hole_cards_combination, board_cards, players_folded, callback) values = np.append(values, [player_values], 0) return np.mean(values, 0) elif isinstance(node, BoardCardsNode): possible_board_cards = intersection( *map(lambda node: node.children, nodes)) values = np.zeros( [len(possible_board_cards), self.game.get_num_players()]) for i, next_board_cards in enumerate(possible_board_cards): new_nodes = [node.children[next_board_cards] for node in nodes] new_board_cards = flatten(board_cards, next_board_cards) values[i, :] = self.get_player_utilities( new_nodes, hole_cards, new_board_cards, players_folded, callback) return np.mean(values, 0) else: current_player_node = nodes[node.player] utilities = np.zeros(self.game.get_num_players()) for a in current_player_node.children: new_nodes = [node.children[a] for node in nodes] new_players_folded = players_folded if a == 0: new_players_folded = list(players_folded) new_players_folded[current_player_node.player] = True action_utilities = self.get_player_utilities( new_nodes, hole_cards, board_cards, new_players_folded, callback) utilities += action_utilities * current_player_node.strategy[a] return utilities
def _solve( self, player_position, best_response_node, player_states, best_response_cards, board_cards): if isinstance(best_response_node, TerminalNode): parent_action = get_parent_action(best_response_node) players_folded = [False] * 2 if parent_action == 0: player_folded = 0 if best_response_node.parent.player == player_position else 1 players_folded[player_folded] = True player_value_sum = 0 for state in player_states: hands = [state[2], best_response_cards] pot_commitment = best_response_node.pot_commitment if player_position == 1: pot_commitment = np.flip(pot_commitment, axis=0) player_utilities = get_utility(hands, board_cards, players_folded, pot_commitment) player_value_sum += player_utilities[0] * state[1] return player_value_sum elif isinstance(best_response_node, HoleCardsNode): player_values_sum = 0 for cards in best_response_node.children: new_best_response_cards = flatten(best_response_cards, cards) new_player_states = np.empty([0, 3]) for other_cards in best_response_node.children: if len(intersection(cards, other_cards)) == 0 and len(intersection(cards, board_cards)) == 0: for state in player_states: new_player_states = np.append( new_player_states, [[state[0].children[other_cards], state[1], other_cards]], axis=0) player_values_sum += self._solve( player_position, best_response_node.children[cards], new_player_states, new_best_response_cards, board_cards) return player_values_sum / len(best_response_node.children) elif isinstance(best_response_node, BoardCardsNode): player_values_sum = 0 for cards in best_response_node.children: new_board_cards = flatten(board_cards, cards) new_player_states = np.empty([0, 3]) for state in player_states: if cards in state[0].children: new_player_states = np.append( new_player_states, [[state[0].children[cards], state[1], state[2]]], axis=0) player_values_sum += self._solve( player_position, best_response_node.children[cards], new_player_states, best_response_cards, new_board_cards) return player_values_sum / len(best_response_node.children) elif best_response_node.player == player_position: values_sum = 0 for a in best_response_node.children: new_player_states = np.empty([0, 3]) for state in player_states: new_player_states = np.append( new_player_states, [[state[0].children[a], state[1] * state[0].strategy[a], state[2]]], axis=0) values_sum += self._solve( player_position, best_response_node.children[a], new_player_states, best_response_cards, board_cards) return values_sum else: best_value = None best_value_actions = None for a in best_response_node.children: new_player_states = np.empty([0, 3]) for state in player_states: new_player_states = np.append( new_player_states, [[state[0].children[a], state[1], state[2]]], axis=0) player_value = self._solve( player_position, best_response_node.children[a], new_player_states, best_response_cards, board_cards) if (best_value is None) or (player_value < best_value): best_value = player_value best_value_actions = [a] elif player_value == best_value: best_value_actions.append(a) best_value_action_probability = 1 / len(best_value_actions) for a in best_value_actions: best_response_node.strategy[a] = best_value_action_probability return best_value
def _cfr_terminal(self, player, nodes, hole_cards, board_cards, players_folded, opponent_reach_prob): return get_utility( hole_cards, board_cards, players_folded, nodes[0].pot_commitment)[player] * opponent_reach_prob
def test_get_utility_board_cards(self): self.assertEqual( get_utility([(51, ), (47, )], [46], [False, False], [1, 1]).tolist(), [-1, 1])
def test_get_utility_uneven_pot(self): self.assertEqual( get_utility([(51, ), (47, )], [], [False, False], [5, 1]).tolist(), [1, -1])
def test_get_utility_folded_player(self): self.assertEqual( get_utility([(51, ), (47, )], [], [True, False], [1, 1]).tolist(), [-1, 1])