def _internal_get_player_utilities(self, nodes, hole_cards, board_cards, players_folded, callback): node = nodes[0] if isinstance(node, TerminalNode): return get_utility(hole_cards, board_cards, players_folded, node.pot_commitment) elif isinstance(node, HoleCardsNode): values = np.zeros([0, self.game.get_num_players()]) hole_cards = [node.children for node in nodes] for hole_cards_combination in itertools.product(*hole_cards): if is_unique(*hole_cards_combination): new_nodes = [ node.children[hole_cards_combination[i]] for i, node in enumerate(nodes) ] player_values = self.get_player_utilities( new_nodes, hole_cards_combination, board_cards, players_folded, callback) values = np.append(values, [player_values], 0) return np.mean(values, 0) elif isinstance(node, BoardCardsNode): possible_board_cards = intersection( *map(lambda node: node.children, nodes)) values = np.zeros( [len(possible_board_cards), self.game.get_num_players()]) for i, next_board_cards in enumerate(possible_board_cards): new_nodes = [node.children[next_board_cards] for node in nodes] new_board_cards = flatten(board_cards, next_board_cards) values[i, :] = self.get_player_utilities( new_nodes, hole_cards, new_board_cards, players_folded, callback) return np.mean(values, 0) else: current_player_node = nodes[node.player] utilities = np.zeros(self.game.get_num_players()) for a in current_player_node.children: new_nodes = [node.children[a] for node in nodes] new_players_folded = players_folded if a == 0: new_players_folded = list(players_folded) new_players_folded[current_player_node.player] = True action_utilities = self.get_player_utilities( new_nodes, hole_cards, board_cards, new_players_folded, callback) utilities += action_utilities * current_player_node.strategy[a] return utilities
def _cfr_hole_cards(self, player, nodes, hole_cards, board_cards, players_folded, opponent_reach_prob): hole_card_combination_probability = 1 / get_num_hole_card_combinations( self.game) hole_cards = [node.children for node in nodes] hole_card_combinations = filter(lambda comb: is_unique(*comb), itertools.product(*hole_cards)) value_sum = 0 for hole_cards_combination in hole_card_combinations: next_nodes = [ node.children[hole_cards_combination[i]] for i, node in enumerate(nodes) ] player_utility = self._cfr(player, next_nodes, hole_cards_combination, board_cards, players_folded, opponent_reach_prob) value_sum += player_utility * hole_card_combination_probability return value_sum
def get_utility_estimations(self, state, player, sampling_strategy, evaluated_strategies=None): if evaluated_strategies is None: evaluated_strategies = [sampling_strategy] num_players = self.game.get_num_players() opponent_player = (player + 1) % 2 num_evaluated_strategies = len(evaluated_strategies) utilities = np.zeros(num_evaluated_strategies) any_player_folded = False for p in range(num_players): any_player_folded = any_player_folded or state.get_player_folded(p) all_board_cards = get_all_board_cards(self.game, state) all_info_available = not (any_player_folded and self.mucking_enabled) opponent_hole_cards = None possible_player_hole_cards = None if not all_info_available: possible_player_hole_cards = list( filter( lambda hole_cards: is_unique(hole_cards, all_board_cards), sampling_strategy.children)) opponent_nodes = [ list( map( lambda c: self.equilibirum_strategy.children[c], filter( lambda c: is_unique(hole_cards, c, all_board_cards ), self.equilibirum_strategy.children))) for hole_cards in possible_player_hole_cards ] else: opponent_hole_cards = [ state.get_hole_card(opponent_player, c) for c in range(self.game.get_num_hole_cards()) ] possible_player_hole_cards = list( filter( lambda hole_cards: is_unique( hole_cards, opponent_hole_cards, all_board_cards), sampling_strategy.children)) opponent_nodes = [[ self.equilibirum_strategy.children[tuple( sorted(opponent_hole_cards))] ] for _ in range(len(possible_player_hole_cards))] nodes = [[ expert_node.children[hole_cards] for hole_cards in possible_player_hole_cards ] for expert_node in evaluated_strategies] sampling_strategy_nodes = [ sampling_strategy.children[hole_cards] for hole_cards in possible_player_hole_cards ] num_nodes = len(possible_player_hole_cards) evaluated_strategies_reach_probabilities = np.ones( [num_evaluated_strategies, num_nodes]) sampling_strategy_reach_probabilities = np.ones(num_nodes) opponent_nodes_reach_probabilities = np.ones( [len(opponent_nodes), len(opponent_nodes[0])]) # Calculate correction term for hole cards if all_info_available: histories_actions_utilities = {} for i in range(num_nodes): history_actions_utilities = {} histories_actions_utilities[i] = history_actions_utilities for a in filter(lambda a: a != possible_player_hole_cards[i], sampling_strategy.children): nodes_tmp = [None, None] nodes_tmp[player] = sampling_strategy_nodes[i] nodes_tmp[opponent_player] = sampling_strategy.children[a] key = ';'.join(map(lambda m: str(m), nodes_tmp)) history_actions_utilities[a] = self.utilities_dict[key][ player] history_sampling_strategy_reach_probabilities_sum = np.sum( sampling_strategy_reach_probabilities) current_history_expected_value = 0 for i in range(num_nodes): for a in filter(lambda a: a != possible_player_hole_cards[i], sampling_strategy.children): current_history_expected_value += histories_actions_utilities[i][a] \ * sampling_strategy_reach_probabilities[i] / num_nodes for i in range(num_nodes): sampling_strategy_reach_probabilities[i] /= num_nodes for j in range(num_evaluated_strategies): evaluated_strategies_reach_probabilities[j, i] /= num_nodes next_history_sampling_strategy_reach_probabilities_sum = np.sum( sampling_strategy_reach_probabilities) next_history_expected_value = 0 for i in range(num_nodes): next_history_expected_value += histories_actions_utilities[i][tuple(sorted(opponent_hole_cards))] \ * sampling_strategy_reach_probabilities[i] utilities += \ (current_history_expected_value / history_sampling_strategy_reach_probabilities_sum) \ - (next_history_expected_value / next_history_sampling_strategy_reach_probabilities_sum) def add_terminals_to_utilities( pot_commitment, players_folded, sampling_strategy_reach_probabilities, evaluated_strategies_reach_probabilities): nonlocal utilities nonlocal player sampling_strategy_reach_probability_sum = np.sum( sampling_strategy_reach_probabilities) if sampling_strategy_reach_probability_sum == 0: return for i in range(num_nodes): utility = None if players_folded[player]: utility = -pot_commitment[player] else: hole_cards = [ possible_player_hole_cards[i] if p == player else opponent_hole_cards for p in range(num_players) ] utility = get_utility(hole_cards, all_board_cards, players_folded, pot_commitment)[player] utilities += utility * ( evaluated_strategies_reach_probabilities[:, i] / sampling_strategy_reach_probability_sum) def update_reach_proabilities( action, sampling_strategy_nodes, nodes, sampling_strategy_reach_probabilities, evaluated_strategies_reach_probabilities): for i in range(num_nodes): sampling_strategy_reach_probabilities[ i] *= sampling_strategy_nodes[i].strategy[action] for j in range(num_evaluated_strategies): evaluated_strategies_reach_probabilities[ j, i] *= nodes[j][i].strategy[action] round_index = 0 action_index = 0 while True: node = nodes[0][0] if isinstance(node, BoardCardsNode): new_board_cards = get_board_cards(self.game, state, round_index) # Calculate correction term for board cards # if all_info_available: # TODO NOtok num_board_cards = len(opponent_nodes[0][0].children) histories_actions_utilities = {} for i in range(num_nodes): history_actions_utilities = {} histories_actions_utilities[i] = history_actions_utilities for a in filter( lambda a: a in opponent_nodes[i][0].children if len(opponent_nodes[i]) == 1 else True, sampling_strategy_nodes[i].children): opponent_nodes_utilities = np.zeros( len(opponent_nodes[i])) for j, opponent_node in enumerate(opponent_nodes[i]): if a in opponent_node.children: nodes_tmp = [None, None] nodes_tmp[player] = sampling_strategy_nodes[ i].children[a] nodes_tmp[ opponent_player] = opponent_node.children[ a] key = ';'.join(map(lambda m: str(m), nodes_tmp)) opponent_nodes_utilities[ j] = self.utilities_dict[key][player] if np.sum(opponent_nodes_reach_probabilities[i]) != 0: opponent_reach_ratios = opponent_nodes_reach_probabilities[ i] / np.sum( opponent_nodes_reach_probabilities[i]) else: opponent_reach_ratios = 1 / np.ones( len(opponent_nodes_reach_probabilities[i])) history_actions_utilities[a] = np.sum( opponent_nodes_utilities * opponent_reach_ratios) history_sampling_strategy_reach_probabilities_sum = np.sum( sampling_strategy_reach_probabilities) importance_sampling_ratio = np.sum( evaluated_strategies_reach_probabilities, axis=1) / history_sampling_strategy_reach_probabilities_sum current_history_expected_value = 0 for i in range(num_nodes): # for a in filter(lambda a: a in sampling_strategy_nodes[i].children, opponent_node.children): for a in filter( lambda a: a in opponent_nodes[i][0].children if len(opponent_nodes[i]) == 1 else True, sampling_strategy_nodes[i].children): current_history_expected_value += histories_actions_utilities[i][a] \ * sampling_strategy_reach_probabilities[i] / num_board_cards for i in range(num_nodes): sampling_strategy_reach_probabilities[i] /= num_board_cards for j in range(num_evaluated_strategies): evaluated_strategies_reach_probabilities[ j, i] /= num_board_cards next_history_sampling_strategy_reach_probabilities_sum = np.sum( sampling_strategy_reach_probabilities) next_history_expected_value = 0 for i in range(num_nodes): next_history_expected_value += histories_actions_utilities[i][new_board_cards] \ * sampling_strategy_reach_probabilities[i] utilities += \ ((current_history_expected_value / history_sampling_strategy_reach_probabilities_sum) \ - (next_history_expected_value / next_history_sampling_strategy_reach_probabilities_sum)) * importance_sampling_ratio nodes = [[ expert_node.children[new_board_cards] for expert_node in expert_nodes ] for expert_nodes in nodes] sampling_strategy_nodes = [ node.children[new_board_cards] for node in sampling_strategy_nodes ] # if all_info_available: # opponent_node = opponent_node.children[new_board_cards] opponent_nodes = [[ opponent_nodes[i][j].children[new_board_cards] for j in range(len(opponent_nodes[i])) ] for i in range(num_nodes)] elif isinstance(node, ActionNode): action = convert_action_to_int( state.get_action_type(round_index, action_index)) if node.player == player: # Calculate correction term for player actions # if all_info_available: histories_actions_utilities = {} for i in range(num_nodes): history_actions_utilities = {} histories_actions_utilities[ i] = history_actions_utilities for a in filter( lambda a: a in opponent_nodes[i][0].children if len(opponent_nodes[i]) == 1 else True, sampling_strategy_nodes[i].children): opponent_nodes_utilities = np.zeros( len(opponent_nodes[i])) for j, opponent_node in enumerate( opponent_nodes[i]): nodes_tmp = [None, None] nodes_tmp[player] = sampling_strategy_nodes[ i].children[a] nodes_tmp[ opponent_player] = opponent_node.children[ a] key = ';'.join(map(lambda m: str(m), nodes_tmp)) opponent_nodes_utilities[ j] = self.utilities_dict[key][player] if np.sum(opponent_nodes_reach_probabilities[i] ) != 0: opponent_reach_ratios = opponent_nodes_reach_probabilities[ i] / np.sum( opponent_nodes_reach_probabilities[i]) else: opponent_reach_ratios = 1 / np.ones( len(opponent_nodes_reach_probabilities[i])) history_actions_utilities[a] = np.sum( opponent_nodes_utilities * opponent_reach_ratios) history_sampling_strategy_reach_probabilities_sum = np.sum( sampling_strategy_reach_probabilities) importance_sampling_ratio = np.sum( evaluated_strategies_reach_probabilities, axis=1 ) / history_sampling_strategy_reach_probabilities_sum current_history_expected_value = 0 for i in range(num_nodes): for a in sampling_strategy_nodes[i].children: current_history_expected_value += histories_actions_utilities[i][a] \ * sampling_strategy_reach_probabilities[i] * sampling_strategy_nodes[i].strategy[a] update_reach_proabilities( action, sampling_strategy_nodes, nodes, sampling_strategy_reach_probabilities, evaluated_strategies_reach_probabilities) # if all_info_available: next_history_sampling_strategy_reach_probabilities_sum = np.sum( sampling_strategy_reach_probabilities) next_history_expected_value = 0 for i in range(num_nodes): next_history_expected_value += histories_actions_utilities[i][action] \ * sampling_strategy_reach_probabilities[i] try: utilities += \ ((current_history_expected_value / history_sampling_strategy_reach_probabilities_sum) \ - (next_history_expected_value / next_history_sampling_strategy_reach_probabilities_sum)) * importance_sampling_ratio except: print() else: for i in range(num_nodes): for j in range(len(opponent_nodes[0])): opponent_nodes_reach_probabilities[ i, j] *= opponent_nodes[i][j].strategy[action] action_index += 1 if action_index == state.get_num_actions(round_index): round_index += 1 action_index = 0 nodes = [[ expert_node.children[action] for expert_node in expert_nodes ] for expert_nodes in nodes] sampling_strategy_nodes = [ node.children[action] for node in sampling_strategy_nodes ] opponent_nodes = [[ opponent_nodes[i][j].children[action] for j in range(len(opponent_nodes[i])) ] for i in range(num_nodes)] elif isinstance(node, TerminalNode): players_folded = [ state.get_player_folded(p) for p in range(num_players) ] add_terminals_to_utilities( node.pot_commitment, players_folded, sampling_strategy_reach_probabilities, evaluated_strategies_reach_probabilities) break return utilities
def get_utility_estimations(self, state, player, sampling_strategy, evaluated_strategies=None): if evaluated_strategies is None: evaluated_strategies = [sampling_strategy] num_players = self.game.get_num_players() opponent_player = (player + 1) % 2 num_evaluated_strategies = len(evaluated_strategies) utilities = np.zeros(num_evaluated_strategies) any_player_folded = False for p in range(num_players): any_player_folded = any_player_folded or state.get_player_folded(p) all_board_cards = get_all_board_cards(self.game, state) opponent_hole_cards = None possible_player_hole_cards = None if any_player_folded and self.mucking_enabled: possible_player_hole_cards = list( filter( lambda hole_cards: is_unique(hole_cards, all_board_cards), sampling_strategy.children)) else: opponent_hole_cards = [ state.get_hole_card(opponent_player, c) for c in range(self.game.get_num_hole_cards()) ] possible_player_hole_cards = list( filter( lambda hole_cards: is_unique( hole_cards, opponent_hole_cards, all_board_cards), sampling_strategy.children)) nodes = [[ expert_node.children[hole_cards] for hole_cards in possible_player_hole_cards ] for expert_node in evaluated_strategies] sampling_strategy_nodes = [ sampling_strategy.children[hole_cards] for hole_cards in possible_player_hole_cards ] num_nodes = len(possible_player_hole_cards) evaluated_strategies_reach_probabilities = np.ones( [num_evaluated_strategies, num_nodes]) sampling_strategy_reach_probabilities = np.ones(num_nodes) def add_terminals_to_utilities( pot_commitment, players_folded, sampling_strategy_reach_probabilities, evaluated_strategies_reach_probabilities): nonlocal utilities nonlocal player sampling_strategy_reach_probability_sum = np.sum( sampling_strategy_reach_probabilities) if sampling_strategy_reach_probability_sum == 0: return for i in range(num_nodes): utility = None if players_folded[player]: utility = -pot_commitment[player] else: hole_cards = [ possible_player_hole_cards[i] if p == player else opponent_hole_cards for p in range(num_players) ] utility = get_utility(hole_cards, all_board_cards, players_folded, pot_commitment)[player] utilities += utility * ( evaluated_strategies_reach_probabilities[:, i] / sampling_strategy_reach_probability_sum) def update_reach_proabilities( action, sampling_strategy_nodes, nodes, sampling_strategy_reach_probabilities, evaluated_strategies_reach_probabilities): for i in range(num_nodes): if sampling_strategy_reach_probabilities is not None: sampling_strategy_reach_probabilities[ i] *= sampling_strategy_nodes[i].strategy[action] for j in range(num_evaluated_strategies): evaluated_strategies_reach_probabilities[ j, i] *= nodes[j][i].strategy[action] round_index = 0 action_index = 0 while True: node = nodes[0][0] if isinstance(node, BoardCardsNode): new_board_cards = get_board_cards(self.game, state, round_index) nodes = [[ expert_node.children[new_board_cards] for expert_node in expert_nodes ] for expert_nodes in nodes] sampling_strategy_nodes = [ node.children[new_board_cards] for node in sampling_strategy_nodes ] elif isinstance(node, ActionNode): action = convert_action_to_int( state.get_action_type(round_index, action_index)) if node.player == player: # for other_action in filter(lambda a: a != action and isinstance(node.children[a], TerminalNode), node.children): # sampling_strategy_reach_probabilities_copy = np.copy(sampling_strategy_reach_probabilities) # evaluated_strategies_reach_probabilities_copy = np.copy(evaluated_strategies_reach_probabilities) # update_reach_proabilities( # other_action, # sampling_strategy_nodes, # nodes, # sampling_strategy_reach_probabilities_copy, # evaluated_strategies_reach_probabilities_copy) # players_folded = [True if p == player and other_action == 0 else False for p in range(2)] # add_terminals_to_utilities( # node.children[other_action].pot_commitment, # players_folded, # sampling_strategy_reach_probabilities, # evaluated_strategies_reach_probabilities_copy) update_reach_proabilities( action, sampling_strategy_nodes, nodes, sampling_strategy_reach_probabilities, evaluated_strategies_reach_probabilities) action_index += 1 if action_index == state.get_num_actions(round_index): round_index += 1 action_index = 0 nodes = [[ expert_node.children[action] for expert_node in expert_nodes ] for expert_nodes in nodes] sampling_strategy_nodes = [ node.children[action] for node in sampling_strategy_nodes ] elif isinstance(node, TerminalNode): players_folded = [ state.get_player_folded(p) for p in range(num_players) ] add_terminals_to_utilities( node.pot_commitment, players_folded, sampling_strategy_reach_probabilities, evaluated_strategies_reach_probabilities) break return utilities
def test_is_unique_false(self): self.assertEqual(is_unique([1, 2], [3], [3, 4, 5]), False)
def test_is_unique_true(self): self.assertEqual(is_unique((1, 2), [3], [4, 5]), True)