def update_average_strategy(self, node, current_strategy, _iter, actions_count): ''' Update a node's average strategy with the current iteration strategy. Params: node: the node to update current_strategy: the CFR strategy for the current iteration iter: the iteration number of the current CFR iteration''' if _iter >= arguments.cfr_skip_iters: if node.strategy == None: node.strategy = arguments.Tensor( actions_count, game_settings.card_count).fill_(0) if node.iter_weight_sum == None: node.iter_weight_sum = arguments.Tensor( game_settings.card_count).fill_(0) iter_weight_contribution = node.ranges_absolute[ node.current_player].clone() iter_weight_contribution[torch.le(iter_weight_contribution, 0)] = self.regret_epsilon node.iter_weight_sum.add_(iter_weight_contribution) iter_weight = torch.div(iter_weight_contribution, node.iter_weight_sum) expanded_weight = iter_weight.view( 1, game_settings.card_count).expand_as(node.strategy) old_strategy_scale = expanded_weight * ( -1) + 1 # same as 1 - expanded weight node.strategy.mul_(old_strategy_scale) strategy_addition = current_strategy.mul(expanded_weight) node.strategy.add_(strategy_addition)
def _set_call_matrix(self, board): ''' Sets the evaluator's call matrix, which gives the equity for terminal nodes where no player has folded. For nodes in the last betting round, creates the matrix `A` such that for player ranges `x` and `y`, `x'Ay` is the equity for the first player when no player folds. For nodes in the first betting round, gives the weighted average of all such possible matrices. Params: board: a possibly empty vector of board cards ''' street = card_tools.board_to_street(board) self.equity_matrix = arguments.Tensor(game_settings.card_count, game_settings.card_count).fill_(0) if street == 1: # iterate through all possible next round streets next_round_boards = card_tools.get_second_round_boards() boards_count = next_round_boards.size(0) next_round_equity_matrix = arguments.Tensor(game_settings.card_count, game_settings.card_count) for board in range(boards_count): self.get_last_round_call_matrix(next_round_boards[board], next_round_equity_matrix) self.equity_matrix.add_(next_round_equity_matrix) # averaging the values in the call matrix weight_constant = 1/(game_settings.card_count -2) if game_settings.board_card_count == 1 else 2/((game_settings.card_count -2) * (game_settings.card_count -3 )) self.equity_matrix.mul_(weight_constant) elif street == 2: # for last round we just return the matrix self.get_last_round_call_matrix(board, self.equity_matrix) else: # impossible street assert False, 'impossible street'
def get_possible_bucket_mask(self): ''' Gives a vector of possible buckets on the the board. @{set_board} must be called first. Return a mask vector over buckets where each entry is 1 if the bucket is valid, 0 if not''' mask = arguments.Tensor(1, self.bucket_count) card_indicator = arguments.Tensor(1, game_settings.card_count).fill_(1) mask = torch.mm(card_indicator, self._range_matrix) return mask
def set_board(self, board): ''' Sets the board cards for the bucketer. Params: board: a non-empty vector of board cards''' self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() self._range_matrix = arguments.Tensor(game_settings.card_count, self.bucket_count).zero_() buckets = self.bucketer.compute_buckets(board) class_ids = torch.arange(0, self.bucket_count) if arguments.gpu: buckets = buckets.cuda() class_ids = class_ids.cuda() else: class_ids = class_ids.float() class_ids = class_ids.view(1, self.bucket_count).expand( game_settings.card_count, self.bucket_count) card_buckets = buckets.view(game_settings.card_count, 1).expand(game_settings.card_count, self.bucket_count) # finding all strength classes # matrix for transformation from card ranges to strength class ranges self._range_matrix[torch.eq(class_ids, card_buckets)] = 1 # matrix for transformation form class values to card values self._reverse_value_matrix = self._range_matrix.T.clone()
def _fill_chance(self, node): ''' Fills all chance nodes of a subtree with the probability of each outcome. Params: node: the root of the subtree ''' if (node.terminal): return if node.current_player == constants.players.chance: # chance node, we will fill uniform strategy # works only for chance node at start of second round assert (len(node.children) == self.board_count) # filling strategy # we will fill strategy with an uniform probability, but it has to be zero for hands that are not possible on # corresponding board node.strategy = arguments.Tensor(len(node.children), game_settings.card_count).fill_(0) # setting strategy for impossible hands to 0 for i in _range(len(node.children)): child_node = node.children[i] mask = card_tools.get_possible_hand_indexes( child_node.board).byte() node.strategy[i][mask] = 1.0 / (self.board_count - 2) for i in _range(len(node.children)): child_node = node.children[i] self._fill_chance(child_node)
def _build_tree_dfs(self, current_node): ''' Recursively build the (sub)tree rooted at the current node. Params: current_node: the root to build the (sub)tree from Return `current_node` after the (sub)tree has been built ''' self._fill_additional_attributes(current_node) children = self._get_children_nodes(current_node) current_node.children = children depth = 0 current_node.actions = arguments.Tensor(len(children)) for i in range(len(children)): children[i].parent = current_node self._build_tree_dfs(children[i]) depth = max(depth, children[i].depth) if i == 0: current_node.actions[i] = constants.actions.fold elif i == 1: current_node.actions[i] = constants.actions.ccall else: current_node.actions[i] = children[i].bets.max() current_node.depth = depth + 1 return current_node
def compute_values(self, root, starting_ranges=None ): ''' Compute the self play and best response values of a strategy profile on the given game tree. The cfvs for each player in the given strategy profile when playing against each other is stored in the `cf_values` field for each node. The cfvs for a best response against each player in the profile are stored in the `cf_values_br` field for each node. Params: root: The root of the game tree. Each node of the tree is assumed to have a strategy saved in the `strategy` field. starting_ranges [opt]: probability vectors over player private hands at the root node (default uniform)''' # 1.0 set the starting range uniform_ranges = arguments.Tensor(constants.players_count, game_settings.card_count).fill_(1.0/game_settings.card_count) if starting_ranges == None: starting_ranges = uniform_ranges # 2.0 check the starting ranges checksum = starting_ranges.sum(dim=1) assert abs(checksum[0] - 1) < 0.0001, 'starting range does not sum to 1' assert abs(checksum[1] - 1) < 0.0001, 'starting range does not sum to 1' assert(starting_ranges.lt(0).sum() == 0) # 3.0 compute the values self._fill_ranges_dfs(root, starting_ranges) self._compute_values_dfs(root)
def build_tree(self, params): ''' Builds the tree. Params: params: table of tree parameters, containing the following fields: * `street`: the betting round of the root node * `bets`: the number of chips committed at the root node by each player * `current_player`: the acting player at the root node * `board`: a possibly empty vector of board cards at the root node * `limit_to_street`: if `true`, only build the current betting round * `bet_sizing` (optional): a @{bet_sizing} object which gives the allowed bets for each player Return the root node of the built tree''' root = TreeNode() # copy necessary stuff from the root_node not to touch the input root.street = params.root_node.street root.bets = params.root_node.bets.clone() root.current_player = params.root_node.current_player root.board = params.root_node.board.clone() if not params.bet_sizing: params.bet_sizing = BetSizing( arguments.Tensor(arguments.bet_sizing)) assert params.bet_sizing self.bet_sizing = params.bet_sizing self.limit_to_street = params.limit_to_street self._build_tree_dfs(root) strategy_filling = StrategyFilling() strategy_filling.fill_uniform(root) return root
def get_possible_bets(self, node): ''' Gives the bets which are legal at a game state. Params: node: a representation of the current game state, with fields: * `bets`: the number of chips currently committed by each player * `current_player`: the currently acting player Return an Nx2 tensor where N is the number of new possible game states, containing N sets of new commitment levels for each player ''' current_player = node.current_player assert current_player == 0 or current_player == 1, 'Wrong player for bet size computation' opponent = 1 - node.current_player opponent_bet = node.bets[opponent] assert node.bets[current_player] <= opponent_bet # compute min possible raise size max_raise_size = arguments.stack - opponent_bet min_raise_size = opponent_bet - node.bets[current_player] min_raise_size = max(min_raise_size, arguments.ante) min_raise_size = min(max_raise_size, min_raise_size) if min_raise_size == 0: return arguments.Tensor() elif min_raise_size == max_raise_size: out = arguments.Tensor(1, 2).fill_(opponent_bet) out[0][current_player] = opponent_bet + min_raise_size return out else: # iterate through all bets and check if they are possible max_possible_bets_count = self.pot_fractions.size(0) + 1 # we can always go allin out = arguments.Tensor(max_possible_bets_count, 2).fill_(opponent_bet) # take pot size after opponent bet is called pot = opponent_bet * 2 used_bets_count = 0 # try all pot fractions bet and see if we can use them for i in range(self.pot_fractions.size(0)): raise_size = pot * self.pot_fractions[i] if raise_size >= min_raise_size and raise_size < max_raise_size: out[used_bets_count, current_player] = opponent_bet + raise_size used_bets_count = used_bets_count + 1 # adding allin assert used_bets_count <= max_possible_bets_count out[used_bets_count, current_player] = opponent_bet + max_raise_size used_bets_count = used_bets_count + 1 return out[:used_bets_count, :]
def _generate_sorted_range(self, _range): ''' Samples a batch of ranges with hands sorted by strength on the board. Params: range: a NxK tensor in which to store the sampled ranges, where N is the number of ranges to sample and K is the range size''' batch_size = _range.size(0) self._generate_recursion(_range, arguments.Tensor(batch_size).fill_(1))
def get_action_strategy(self, action): ''' Returns an arbitrary vector. Params: action [opt]: not used Return a vector of 1s ''' return arguments.Tensor(game_settings.card_count).fill_(1)
def get_chance_action_cfv(self, player_action, board): ''' Returns an arbitrary vector. Params: player_action [opt]: not used board [opt]: not used Return a vector of 1s ''' return arguments.Tensor(game_settings.card_count).fill_(1)
def set_board(self, board): ''' Sets the (possibly empty) board cards to sample ranges with. The sampled ranges will assign 0 probability to any private hands that share any cards with the board. Params: board: a possibly empty vector of board cards''' hand_strengths = evaluator.batch_eval(board) possible_hand_indexes = card_tools.get_possible_hand_indexes(board) self.possible_hands_count = possible_hand_indexes.sum(0, dtype=torch.uint8).item() self.possible_hands_mask = possible_hand_indexes.view(1, -1).bool() non_coliding_strengths = arguments.Tensor(self.possible_hands_count) non_coliding_strengths = torch.masked_select(hand_strengths, self.possible_hands_mask) _, order = non_coliding_strengths.sort() _, self.reverse_order = order.sort() self.reverse_order = self.reverse_order.view(1, -1).long() self.reordered_range = arguments.Tensor()
def forward(ctx, outputs, targets, mask): ''' Computes the loss over a batch of neural net outputs and targets. Params: outputs: an NxM tensor containing N vectors of values over buckets, output by the neural net targets: an NxM tensor containing N vectors of actual values over buckets, produced by @{data_generation_call} mask: an NxM tensor containing N mask vectors generated with @{bucket_conversion.get_possible_bucket_mask} Return the sum of Huber loss applied elementwise on `outputs` and `targets`, masked so that only valid buckets are included''' batch_size = outputs.size(0) feature_size = outputs.size(1) # 1.0 zero out the outputs/target so that the error does not depend on these outputs.mul_(mask) targets.mul_(mask) loss = smoothL1LossForward(outputs, targets) # 2.0 if the batch size has changed, create new storage for the sum, otherwise reuse mask_placeholder = arguments.Tensor(mask.size()).fill_(0) mask_sum = arguments.Tensor(batch_size).fill_(0) mask_multiplier = mask_sum.clone().fill_(0).view(-1, 1) # 3.0 compute mask sum for each batch mask_placeholder.copy_(mask) mask_sum = mask_placeholder.sum(dim=1, keepdim=True) # 3.1 mask multiplier - note that mask is 1 for impossible features mask_multiplier.fill_(feature_size) mask_multiplier.sub_(mask_sum) mask_multiplier.div_(feature_size) # 4.0 multiply to get a new losss # loss is not really computed batch-wise correctly, # but that does not really matter now since gradients are correct loss_multiplier = (batch_size * feature_size) / (batch_size * feature_size - mask_sum.sum() ) new_loss = loss_multiplier * loss ctx.save_for_backward(outputs, targets, mask_multiplier) return new_loss
def _fill_uniformly(self, node): ''' Fills a player node with a uniform strategy. Params: node: the player node ''' assert node.current_player == constants.players.P1 or node.current_player == constants.players.P2 if node.terminal: return node.strategy = arguments.Tensor(len(node.children), game_settings.card_count).fill_( 1.0 / len(node.children))
def resolve_first_node(self): ''' Solves a depth-limited lookahead from the first node of the game to get opponent counterfactual values. The cfvs are stored in the field `starting_cfvs_p1`. Because this is the first node of the game, exact ranges are known for both players, so opponent cfvs are not necessary for solving.''' self.first_node_resolving = Resolving() first_node = TreeNode() first_node.board = arguments.Tensor() first_node.street = 1 first_node.current_player = constants.players.P1 first_node.bets = arguments.Tensor([arguments.ante, arguments.ante]) # create the starting ranges player_range = card_tools.get_uniform_range(first_node.board) opponent_range = card_tools.get_uniform_range(first_node.board) # create re-solving and re-solve the first node self.first_node_resolving = Resolving() self.first_node_resolving.resolve_first_node(first_node, player_range, opponent_range) # store the initial CFVs self.starting_cfvs_p1 = self.first_node_resolving.get_root_cfv()
def _set_fold_matrix(self, board): ''' Sets the evaluator's fold matrix, which gives the equity for terminal nodes where one player has folded. Creates the matrix `B` such that for player ranges `x` and `y`, `x'By` is the equity for the player who doesn't fold Params: board: a possibly empty vector of board cards ''' self.fold_matrix = arguments.Tensor(game_settings.card_count, game_settings.card_count) self.fold_matrix.fill_(1) # setting cards that block each other to zero - exactly elements on diagonal in leduc variants self.fold_matrix.sub_(torch.eye(game_settings.card_count).type_as(self.fold_matrix)) self._handle_blocking_cards(self.fold_matrix, board)
def _sample_bet(self, node, state): ''' Samples an action to take from the strategy at the given game state. Params: node: the game node where the re-solving player is to act (a table of the type returned by @{protocol_to_node.parsed_state_to_node}) state: the game state where the re-solving player is to act (a table of the type returned by @{protocol_to_node.parse_state}) Return an index representing the action chosen ''' # 1.0 get the possible bets in the node possible_bets = self.resolving.get_possible_actions() actions_count = possible_bets.size(0) # 2.0 get the strategy for the current hand since the strategy is computed for all hands hand_strategy = arguments.Tensor(actions_count) for i in range(actions_count): action_bet = possible_bets[i] action_strategy = self.resolving.get_action_strategy(action_bet) hand_strategy[i] = action_strategy[self.hand_id] assert (abs(1 - hand_strategy.sum()) < 0.001) print("strategy:") print(hand_strategy) # 3.0 sample the action by doing cumsum and uniform sample hand_strategy_cumsum = torch.cumsum(hand_strategy, dim=0) r = torch.rand(1) sampled_bet = possible_bets[hand_strategy_cumsum.gt(r)][0].item() print("playing action that has prob: " + hand_strategy[hand_strategy_cumsum.gt(r)][0].item()) # 4.0 update the invariants based on our action self.current_opponent_cfvs_bound = self.resolving.get_action_cfv( sampled_bet) strategy = self.resolving.get_action_strategy(sampled_bet) self.current_player_range.mul_(strategy) self.current_player_range = card_tools.normalize_range( node.board, self.current_player_range) return sampled_bet
def get_possible_hand_indexes(self, board): ''' Gives the private hands which are valid with a given board. Params: board: a possibly empty vector of board cards Return a vector with an entry for every possible hand (private card), which is `1` if the hand shares no cards with the board and `0` otherwise''' out = arguments.Tensor(game_settings.card_count).fill_(0) if board.dim() == 0: out.fill_(1) return out whole_hand = arguments.IntTensor(board.size(0) + 1) whole_hand[:-1].copy_(board) for card in range(game_settings.card_count): whole_hand[-1] = card if self.hand_is_possible(whole_hand): out[card] = 1 return out
def _init_bucketing(self): ''' Initializes the tensor that translates hand ranges to bucket ranges. ''' self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() boards = card_tools.get_second_round_boards() self.board_count = boards.size(0) self._range_matrix = arguments.Tensor( game_settings.card_count, self.board_count * self.bucket_count).zero_() self._range_matrix_board_view = self._range_matrix.view( game_settings.card_count, self.board_count, self.bucket_count) for idx in range(self.board_count): board = boards[idx] buckets = self.bucketer.compute_buckets(board) class_ids = torch.arange(0, self.bucket_count) if arguments.gpu: buckets = buckets.cuda() class_ids = class_ids.cuda() else: class_ids = class_ids.float() class_ids = class_ids.view(1, self.bucket_count).expand( game_settings.card_count, self.bucket_count) card_buckets = buckets.view(game_settings.card_count, 1).expand(game_settings.card_count, self.bucket_count) # finding all strength classes # matrix for transformation from card ranges to strength class ranges self._range_matrix_board_view[:, idx, :][torch.eq( class_ids, card_buckets)] = 1 # matrix for transformation from class values to card values self._reverse_value_matrix = self._range_matrix.T.clone() # we need to div the matrix by the sum of possible boards (from point of view of each hand) weight_constant = 1 / (self.board_count - 2) # count self._reverse_value_matrix.mul_(weight_constant)
def _fill_chance(self, node): ''' Fills a chance node with the probability of each outcome. Params: node: the chance node ''' assert not node.terminal # filling strategy # we will fill strategy with an uniform probability, but it has to be zero for hands that are not possible on # corresponding board node.strategy = arguments.Tensor(len(node.children), game_settings.card_count).fill_(0) # setting probability of impossible hands to 0 for i in range(len(node.children)): child_node = node.children[i] mask = card_tools.get_possible_hand_indexes( child_node.board).bool() node.strategy[i].fill_(0) # remove 2 because each player holds one card node.strategy[i][mask] = 1.0 / (game_settings.card_count - 2)
def _fill_uniformly(self, node, player): ''' Recursively fills a subtree with a uniform random strategy for the given player. Used in sections of the game to which the player doesn't play. Params: node: the root of the subtree player: the player which is given the uniform random strategy ''' if (node.terminal): return if node.current_player == player: # fill uniform strategy node.strategy = arguments.Tensor(len(node.children), game_settings.card_count).fill_( 1.0 / len(node.children)) for i in range(len(node.children)): child_node = node.children[i] self._fill_uniformly(child_node, player)
def batch_eval(self, board, impossible_hand_value=-1): ''' Gives strength representations for all private hands on the given board. Params: board: a possibly empty vector of board cards impossible_hand_value: the value to assign to hands which are invalid on the board Return a vector containing a strength value or `impossible_hand_value` for every private hand''' hand_values = arguments.Tensor(game_settings.card_count).fill_(-1) if board.dim() == 0: for hand in range(game_settings.card_count): hand_values[hand] = (hand // game_settings.suit_count) + 1 else: board_size = board.size(0) assert board_size == 1 or board_size == 2, 'Incorrect board size for Leduc' whole_hand = arguments.IntTensor(board_size + 1) whole_hand[:-1].copy_(board) for card in range(game_settings.card_count): whole_hand[-1] = card hand_values[card] = self.evaluate(whole_hand, impossible_hand_value) return hand_values
def __init__(self): ''' Constructor. Creates an equity matrix with entries for every possible pair of buckets.''' self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() self.equity_matrix = arguments.Tensor(self.bucket_count, self.bucket_count).zero_() # filling equity matrix boards = card_tools.get_second_round_boards() self.board_count = boards.size(0) self.terminal_equity = TerminalEquity() for i in range(self.board_count): board = boards[i] self.terminal_equity.set_board(board) call_matrix = self.terminal_equity.get_call_matrix() buckets = self.bucketer.compute_buckets(board) for c1 in range(game_settings.card_count): for c2 in range(game_settings.card_count): b1 = buckets[c1] b2 = buckets[c2] if( b1 > 0 and b2 > 0 ): matrix_entry = call_matrix[c1][c2] self.equity_matrix[b1][b2] = matrix_entry
def generate_range(self, _range): ''' Samples a batch of random range vectors. Each vector is sampled indepently by randomly splitting the probability mass between the bottom half and the top half of the range, and then recursing on the two halfs. @{set_board} must be called first. Params: range: a NxK tensor in which to store the sampled ranges, where N is the number of ranges to sample and K is the range size''' batch_size = _range.size(0) self.sorted_range = arguments.Tensor(batch_size, self.possible_hands_count) self._generate_sorted_range(self.sorted_range) # we have to reorder the the range back to undo the sort by strength index = self.reverse_order.expand_as(self.sorted_range) if arguments.gpu: index = index.cuda() self.reordered_range = self.sorted_range.gather(1, index) _range.zero_() _range[self.possible_hands_mask.expand_as(_range)] = self.reordered_range.view(-1)
def __init__(self, board, player_range, opponent_cfvs): ''' Constructor Params: board: board card player_range: an initial range vector for the opponent opponent_cfvs: the opponent counterfactual values vector used for re-solving''' super().__init__() assert (board != None) self.input_opponent_range = player_range.clone() self.input_opponent_value = opponent_cfvs.clone() self.curent_opponent_values = arguments.Tensor( game_settings.card_count) self.regret_epsilon = 1.0 / 100000000 # 2 stands for 2 actions: play/terminate self.opponent_reconstruction_regret = arguments.Tensor( 2, game_settings.card_count) self.play_current_strategy = arguments.Tensor( game_settings.card_count).fill_(0) self.terminate_current_strategy = arguments.Tensor( game_settings.card_count).fill_(1) # holds achieved CFVs at each iteration so that we can compute regret self.total_values = arguments.Tensor(game_settings.card_count) self.terminate_regrets = arguments.Tensor( game_settings.card_count).fill_(0) self.play_regrets = arguments.Tensor(game_settings.card_count).fill_(0) # init range mask for masking out impossible hands self.range_mask = card_tools.get_possible_hand_indexes(board) self.total_values_p2 = None self.play_current_regret = None self.terminate_current_regret = None
from Source.Settings.constants import constants from Source.Settings.game_settings import game_settings from Source.Tree.tree_builder import * from Source.Game.card_to_string_conversion import card_to_string from Source.Tree.tree_values import TreeValues from Source.Tree.tree_strategy_filling import TreeStrategyFilling from Source.Tree.tree_visualiser import TreeVisualiser if __name__ == "__main__": builder = PokerTreeBuilder() params = TreeParams() params.root_node = TreeNode() params.root_node.board = card_to_string.string_to_board('') params.root_node.street = 1 params.root_node.current_player = constants.players.P1 params.root_node.bets = arguments.Tensor([100, 100]) tree = builder.build_tree(params) filling = TreeStrategyFilling() range1 = card_tools.get_uniform_range(params.root_node.board) range2 = card_tools.get_uniform_range(params.root_node.board) filling.fill_strategies(tree, 0, range1, range2) filling.fill_strategies(tree, 1, range1, range2) starting_ranges = arguments.Tensor(constants.players_count, game_settings.card_count) starting_ranges[0].copy_(range1) starting_ranges[1].copy_(range2)
def cfrs_iter_dfs(self, node, _iter): ''' Recursively walks the tree, applying the CFR algorithm. Params: node: the current node in the tree iter: the current iteration number ''' assert (node.current_player == constants.players.P1 or node.current_player == constants.players.P2 or node.current_player == constants.players.chance) opponent_index = 1 - node.current_player # dimensions in tensor action_dimension = 0 card_dimension = 1 # compute values using terminal_equity in terminal nodes if (node.terminal): terminal_equity = self._get_terminal_equity(node) values = node.ranges_absolute.clone().fill_(0) if (node.type == constants.node_types.terminal_fold): terminal_equity.tree_node_fold_value(node.ranges_absolute, values, opponent_index) else: terminal_equity.tree_node_call_value(node.ranges_absolute, values) # multiply by the pot values = values * node.pot node.cf_values = values.view_as(node.ranges_absolute) else: actions_count = len(node.children) current_strategy = None if node.current_player == constants.players.chance: current_strategy = node.strategy else: # we have to compute current strategy at the beginning of each iteraton # initialize regrets in the first iteration if node.regrets == None: node.regrets = arguments.Tensor( actions_count, game_settings.card_count).fill_( self.regret_epsilon ) # [[actions_count x card_count]] if node.possitive_regrets == None: node.possitive_regrets = arguments.Tensor( actions_count, game_settings.card_count).fill_(self.regret_epsilon) # compute positive regrets so that we can compute the current strategy fromm them node.possitive_regrets.copy_(node.regrets) node.possitive_regrets[torch.le( node.possitive_regrets, self.regret_epsilon)] = self.regret_epsilon # compute the current strategy regrets_sum = node.possitive_regrets.sum(action_dimension) current_strategy = node.possitive_regrets.clone() current_strategy.div_(regrets_sum.expand_as(current_strategy)) # current cfv [[actions, players, ranges]] cf_values_allactions = arguments.Tensor( actions_count, constants.players_count, game_settings.card_count).fill_(0) children_ranges_absolute = {} if node.current_player == constants.players.chance: ranges_mul_matrix = node.ranges_absolute[0].repeat( actions_count, 1) children_ranges_absolute[0] = torch.mul( current_strategy, ranges_mul_matrix) ranges_mul_matrix = node.ranges_absolute[1].repeat( actions_count, 1) children_ranges_absolute[1] = torch.mul( current_strategy, ranges_mul_matrix) else: ranges_mul_matrix = node.ranges_absolute[ node.current_player].repeat(actions_count, 1) children_ranges_absolute[node.current_player] = torch.mul( current_strategy, ranges_mul_matrix) children_ranges_absolute[ opponent_index] = node.ranges_absolute[ opponent_index].repeat(actions_count, 1).clone() for i in range(len(node.children)): child_node = node.children[i] # set new absolute ranges (after the action) for the child child_node.ranges_absolute = node.ranges_absolute.clone() child_node.ranges_absolute[0].copy_( children_ranges_absolute[0][i]) child_node.ranges_absolute[1].copy_( children_ranges_absolute[1][i]) self.cfrs_iter_dfs(child_node, _iter) cf_values_allactions[i] = child_node.cf_values node.cf_values = arguments.Tensor( constants.players_count, game_settings.card_count).fill_(0) if node.current_player != constants.players.chance: strategy_mul_matrix = current_strategy.view_as( arguments.Tensor(actions_count, game_settings.card_count)) node.cf_values[node.current_player] = torch.mul( strategy_mul_matrix, cf_values_allactions[:, node.current_player, :]).sum(dim=0) node.cf_values[opponent_index] = ( cf_values_allactions[:, opponent_index, :]).sum(dim=0) else: node.cf_values[0] = (cf_values_allactions[:, 0, :]).sum(dim=0) node.cf_values[1] = (cf_values_allactions[:, 1, :]).sum(dim=0) if node.current_player != constants.players.chance: # computing regrets current_regrets = cf_values_allactions[:, node. current_player, :].reshape( actions_count, game_settings. card_count).clone() current_regrets.sub_(node.cf_values[node.current_player].view( 1, game_settings.card_count).expand_as(current_regrets)) self.update_regrets(node, current_regrets) # accumulating average strategy self.update_average_strategy(node, current_strategy, _iter, actions_count)
def get_value(self, ranges, values): ''' Gives the predicted counterfactual values at each evaluated state, given input ranges. @{start_computation} must be called first. Each state to be evaluated must be given in the same order that pot sizes were given for that function. Keeps track of iterations internally, so should be called exactly once for every iteration of continual re-solving. Params: ranges: An Nx2xK tensor, where N is the number of states evaluated (must match input to @{start_computation}), 2 is the number of players, and K is the number of private hands. Contains N sets of 2 range vectors. values: an Nx2xK tensor in which to store the N sets of 2 value vectors which are output''' assert ranges != None and values != None assert (ranges.size(0) == self.batch_size) self.iter = self.iter + 1 if self.iter == 1: # initializing data structures self.next_round_inputs = arguments.Tensor( self.batch_size, self.board_count, (self.bucket_count * constants.players_count + 1)).zero_() self.next_round_values = arguments.Tensor( self.batch_size, self.board_count, constants.players_count, self.bucket_count).zero_() self.transposed_next_round_values = arguments.Tensor( self.batch_size, constants.players_count, self.board_count, self.bucket_count) self.next_round_extended_range = arguments.Tensor( self.batch_size, constants.players_count, self.board_count * self.bucket_count).zero_() self.next_round_serialized_range = self.next_round_extended_range.view( -1, self.bucket_count) self.range_normalization = arguments.Tensor() self.value_normalization = arguments.Tensor( self.batch_size, constants.players_count, self.board_count) # handling pot feature for the nn nn_bet_input = self.pot_sizes.clone().mul(1 / arguments.stack) nn_bet_input = nn_bet_input.view(-1, 1).expand(self.batch_size, self.board_count) self.next_round_inputs[:, :, -1].copy_(nn_bet_input) # we need to find if we need remember something in this iteration use_memory = self.iter > arguments.cfr_skip_iters if use_memory and self.iter == arguments.cfr_skip_iters + 1: # first iter that we need to remember something - we need to init data structures self.range_normalization_memory = arguments.Tensor( self.batch_size * self.board_count * constants.players_count, 1).zero_() self.counterfactual_value_memory = arguments.Tensor( self.batch_size, constants.players_count, self.board_count, self.bucket_count).zero_() # computing bucket range in next street for both players at once self._card_range_to_bucket_range( ranges.view(self.batch_size * constants.players_count, -1), self.next_round_extended_range.view( self.batch_size * constants.players_count, -1)) self.range_normalization = self.next_round_serialized_range.sum( dim=1, keepdim=True) rn_view = self.range_normalization.view(self.batch_size, constants.players_count, self.board_count) for player in range(constants.players_count): self.value_normalization[:, player, :].copy_(rn_view[:, 1 - player, :]) if use_memory: self.range_normalization_memory.add_( self.value_normalization.view( self.range_normalization_memory.shape)) # eliminating division by zero self.range_normalization[torch.eq(self.range_normalization, 0)] = 1 self.next_round_serialized_range.div_( self.range_normalization.expand_as( self.next_round_serialized_range)) serialized_range_by_player = self.next_round_serialized_range.view( self.batch_size, constants.players_count, self.board_count, self.bucket_count) for player in range(constants.players_count): self.next_round_inputs[:, :, player * self.bucket_count:( player + 1) * self.bucket_count].copy_( self.next_round_extended_range[:, player, :].view( self.next_round_inputs[:, :, player * self.bucket_count:(player + 1) * self.bucket_count].shape)) # usning nn to compute values serialized_inputs_view = self.next_round_inputs.view( self.batch_size * self.board_count, -1) serialized_values_view = self.next_round_values.view( self.batch_size * self.board_count, -1) # computing value in the next round self.nn.get_value(serialized_inputs_view, serialized_values_view) # normalizing values back according to the orginal range sum normalization_view = self.value_normalization.view( self.batch_size, constants.players_count, self.board_count, 1).transpose(1, 2) self.next_round_values.mul_( normalization_view.expand_as(self.next_round_values)) self.transposed_next_round_values.copy_( self.next_round_values.transpose(2, 1)) # remembering the values for the next round if use_memory: self.counterfactual_value_memory.add_( self.transposed_next_round_values) # translating bucket values back to the card values self._bucket_value_to_card_value( self.transposed_next_round_values.view( self.batch_size * constants.players_count, -1), values.view(self.batch_size * constants.players_count, -1))
def generate_data_file(data_count, file_name): ''' Generates data files containing examples of random poker situations with associated terminal equity. Each poker situation is randomly generated using @{range_generator} and @{random_card_generator}. For description of neural net input and target type, see @{net_builder}. Params: data_count: the number of examples to generate file_name: the prefix of the files where the data is saved (appended with `.inputs`, `.targets`, and `.mask`).''' range_generator = RangeGenerator() batch_size = arguments.gen_batch_size assert(data_count % batch_size == 0, 'data count has to be divisible by the batch size') batch_count = data_count / batch_size bucketer = Bucketer() bucket_count = bucketer:get_bucket_count() player_count = 2 target_size = bucket_count * player_count targets = arguments.Tensor(data_count, target_size) input_size = bucket_count * player_count + 1 inputs = arguments.Tensor(data_count, input_size) mask = arguments.Tensor(data_count, bucket_count).zero_() bucket_conversion = BucketConversion() equity = TerminalEquity() for batch in range(batch_count): board = card_generator.generate_cards(game_settings.board_card_count) range_generator.set_board(board) bucket_conversion.set_board(board) equity.set_board(board) # generating ranges ranges = arguments.Tensor(player_count, batch_size, game_settings.card_count) for player in range(player_count): range_generator.generate_range(ranges[player]) pot_sizes = arguments.Tensor(arguments.gen_batch_size, 1) # generating pot features pot_sizes.copy_(torch.rand(batch_size)) # translating ranges to features pot_feature_index = -1 inputs[batch * batch_size : (batch + 1) * batch_size, pot_feature_index].copy_(pot_sizes) for player in range(player_count): bucket_conversion.card_range_to_bucket_range(ranges[player], inputs[batch * batch_size : (batch + 1) * batch_size, player * bucket_count : (player + 1) * bucket_count]) # computaton of values using terminal equity values = arguments.Tensor(player_count, batch_size, game_settings.card_count) for player in range(player_count): opponent = 1 - player equity.call_value(ranges[opponent], values[player]) # translating values to nn targets for player in range(player_count): bucket_conversion.card_range_to_bucket_range(values[player], targets[batch * batch_size : (batch + 1) * batch_size, player * bucket_count : (player + 1) * bucket_count]]) # computing a mask of possible buckets bucket_mask = bucket_conversion.get_possible_bucket_mask() mask[batch * batch_size : (batch + 1) * batch_size, :].copy_(bucket_mask.expand(batch_size, bucket_count)) torch.save(inputs, file_name + '.inputs') torch.save(targets, file_name + '.targets') torch.save(mask, file_name + '.mask')