def string_to_board(self, card_string): ''' Converts a string representing zero or one board cards to a vector of numeric representations. Params: card_string: either the empty string or a string representation of a card Return either an empty tensor or a tensor containing the numeric representation of the card''' # assert card_string if card_string == '': return arguments.IntTensor() return arguments.IntTensor([self.string_to_card(card_string)])
def generate_cards(self, count): ''' Samples a random set of cards. Each subset of the deck of the correct size is sampled with uniform probability. Params: count: the number of cards to sample Return a vector of cards, represented numerically''' # marking all used cards used_cards = arguments.IntTensor(game_settings.card_count).zero_() out = arguments.IntTensor(count) # counter for generated cards generated_cards_count = 0 while (generated_cards_count < count): card = np.random.randint(0, game_settings.card_count) if (used_cards[card] == 0): out[generated_cards_count] = card used_cards[card] = 1 generated_cards_count = generated_cards_count + 1 return out
def get_second_round_boards(self): ''' Gives all possible sets of board cards for the game. Return an NxK tensor, where N is the number of possible boards, and K is the number of cards on each board''' boards_count = self.get_boards_count() if game_settings.board_card_count == 1: out = arguments.IntTensor(boards_count, 1) for card in range(game_settings.card_count): out[card, 0] = card return out elif game_settings.board_card_count == 2: out = arguments.IntTensor(boards_count, 2) board_idx = 0 for card_1 in range(game_settings.card_count): for card_2 in range(card_1 + 1, game_settings.card_count): out[board_idx, 0] = card_1 out[board_idx, 1] = card_2 board_idx = board_idx + 1 assert board_idx == boards_count, 'wrong boards count!' return out else: assert False, 'unsupported board size'
def _init_board_index_table(self): ''' Initializes the board index table.''' if game_settings.board_card_count == 1: self._board_index_table = torch.arange(game_settings.card_count) elif game_settings.board_card_count == 2: self._board_index_table = arguments.IntTensor( game_settings.card_count, game_settings.card_count).fill_(-1) board_idx = 0 for card_1 in range(game_settings.card_count): for card_2 in range(card_1 + 1, game_settings.card_count): self._board_index_table[card_1][card_2] = board_idx self._board_index_table[card_2][card_1] = board_idx board_idx = board_idx + 1 else: assert False, 'unsupported board size'
def get_possible_hand_indexes(self, board): ''' Gives the private hands which are valid with a given board. Params: board: a possibly empty vector of board cards Return a vector with an entry for every possible hand (private card), which is `1` if the hand shares no cards with the board and `0` otherwise''' out = arguments.Tensor(game_settings.card_count).fill_(0) if board.dim() == 0: out.fill_(1) return out whole_hand = arguments.IntTensor(board.size(0) + 1) whole_hand[:-1].copy_(board) for card in range(game_settings.card_count): whole_hand[-1] = card if self.hand_is_possible(whole_hand): out[card] = 1 return out
def batch_eval(self, board, impossible_hand_value=-1): ''' Gives strength representations for all private hands on the given board. Params: board: a possibly empty vector of board cards impossible_hand_value: the value to assign to hands which are invalid on the board Return a vector containing a strength value or `impossible_hand_value` for every private hand''' hand_values = arguments.Tensor(game_settings.card_count).fill_(-1) if board.dim() == 0: for hand in range(game_settings.card_count): hand_values[hand] = (hand // game_settings.suit_count) + 1 else: board_size = board.size(0) assert board_size == 1 or board_size == 2, 'Incorrect board size for Leduc' whole_hand = arguments.IntTensor(board_size + 1) whole_hand[:-1].copy_(board) for card in range(game_settings.card_count): whole_hand[-1] = card hand_values[card] = self.evaluate(whole_hand, impossible_hand_value) return hand_values
def _compute_structure(self): ''' Computes the number of nodes at each depth of the tree. Used to find the size for the tensors which store lookahead data. ''' assert (self.lookahead.tree.street >= 1 and self.lookahead.tree.street <= 2) self.lookahead.regret_epsilon = 1.0 / 1000000000 # which player acts at particular depth self.lookahead.acting_player = arguments.IntTensor( self.lookahead.depth + 1).fill_(-1) self.lookahead.acting_player[ 0] = 0 # in lookahead, 1 does not stand for player IDs, it's just the first player to act for d in range(1, self.lookahead.depth + 1): self.lookahead.acting_player[d] = 1 - self.lookahead.acting_player[ d - 1] self.lookahead.bets_count[-2] = 1 self.lookahead.bets_count[-1] = 1 self.lookahead.nonallinbets_count[-2] = 1 self.lookahead.nonallinbets_count[-1] = 1 self.lookahead.terminal_actions_count[-2] = 0 self.lookahead.terminal_actions_count[-1] = 0 self.lookahead.actions_count[-2] = 1 self.lookahead.actions_count[-1] = 1 # compute the node counts self.lookahead.nonterminal_nodes_count = {} self.lookahead.nonterminal_nonallin_nodes_count = {} self.lookahead.all_nodes_count = {} self.lookahead.terminal_nodes_count = {} self.lookahead.allin_nodes_count = {} self.lookahead.inner_nodes_count = {} self.lookahead.nonterminal_nodes_count[0] = 1 self.lookahead.nonterminal_nodes_count[1] = self.lookahead.bets_count[ 0] self.lookahead.nonterminal_nonallin_nodes_count[-1] = 1 self.lookahead.nonterminal_nonallin_nodes_count[0] = 1 self.lookahead.nonterminal_nonallin_nodes_count[ 1] = self.lookahead.nonterminal_nodes_count[1] - 1 self.lookahead.all_nodes_count[0] = 1 self.lookahead.all_nodes_count[1] = self.lookahead.actions_count[0] self.lookahead.terminal_nodes_count[0] = 0 self.lookahead.terminal_nodes_count[1] = 2 self.lookahead.allin_nodes_count[0] = 0 self.lookahead.allin_nodes_count[1] = 1 self.lookahead.inner_nodes_count[0] = 1 self.lookahead.inner_nodes_count[1] = 1 for d in range(1, self.lookahead.depth): self.lookahead.all_nodes_count[ d + 1] = self.lookahead.nonterminal_nonallin_nodes_count[ d - 1] * self.lookahead.bets_count[ d - 1] * self.lookahead.actions_count[d] self.lookahead.allin_nodes_count[ d + 1] = self.lookahead.nonterminal_nonallin_nodes_count[ d - 1] * self.lookahead.bets_count[d - 1] * 1 self.lookahead.nonterminal_nodes_count[ d + 1] = self.lookahead.nonterminal_nonallin_nodes_count[ d - 1] * self.lookahead.nonallinbets_count[ d - 1] * self.lookahead.bets_count[d] self.lookahead.nonterminal_nonallin_nodes_count[ d + 1] = self.lookahead.nonterminal_nonallin_nodes_count[ d - 1] * self.lookahead.nonallinbets_count[ d - 1] * self.lookahead.nonallinbets_count[d] self.lookahead.terminal_nodes_count[ d + 1] = self.lookahead.nonterminal_nonallin_nodes_count[ d - 1] * self.lookahead.bets_count[ d - 1] * self.lookahead.terminal_actions_count[d]
def set_datastructures_from_tree_dfs(self, node, layer, action_id, parent_id, gp_id): ''' Traverses the tree to fill in lookahead data structures that summarize data contained in the tree. For example, saves pot sizes and numbers of actions at each lookahead state. Params: node: the current node of the public tree layer: the depth of the current node action_id: the index of the action that led to this node parent_id: the index of the current node's parent gp_id: the index of the current node's grandparent ''' # fill the potsize assert (node.pot) self.lookahead.pot_size[layer][action_id, parent_id, gp_id, :, :] = node.pot node.lookahead_coordinates = arguments.IntTensor( [action_id, parent_id, gp_id]) # transition call cannot be allin call if node.current_player == constants.players.chance: assert (parent_id <= self.lookahead.nonallinbets_count[layer - 2]) if layer < self.lookahead.depth + 1: gp_nonallinbets_count = self.lookahead.nonallinbets_count[layer - 2] prev_layer_terminal_actions_count = self.lookahead.terminal_actions_count[ layer - 1] gp_terminal_actions_count = self.lookahead.terminal_actions_count[ layer - 2] prev_layer_bets_count = 0 prev_layer_bets_count = self.lookahead.bets_count[layer - 1] # compute next coordinates for parent and grandparent next_parent_id = action_id - prev_layer_terminal_actions_count next_gp_id = (gp_id) * gp_nonallinbets_count + (parent_id) if (not node.terminal) and (node.current_player != constants.players.chance): # parent is not an allin raise assert (parent_id <= self.lookahead.nonallinbets_count[layer - 2]) # do we need to mask some actions for that node? (that is, does the node have fewer children than the max number of children for any node on this layer) node_with_empty_actions = (len(node.children) < self.lookahead.actions_count[layer]) if node_with_empty_actions: # we need to mask nonexisting padded bets assert (layer > 0) terminal_actions_count = self.lookahead.terminal_actions_count[ layer] assert (terminal_actions_count == 2) existing_bets_count = len( node.children) - terminal_actions_count # allin situations if existing_bets_count == 0: assert (action_id == self.lookahead.actions_count[layer - 1] - 1) for child_id in range(terminal_actions_count): child_node = node.children[child_id] # go deeper self.set_datastructures_from_tree_dfs( child_node, layer + 1, child_id, next_parent_id, next_gp_id) # we need to make sure that even though there are fewer actions, the last action/allin is has the same last index as if we had full number of actions # we manually set the action_id as the last action (allin) for b in range(existing_bets_count): self.set_datastructures_from_tree_dfs( node.children[len(node.children) - b - 1], layer + 1, self.lookahead.actions_count[layer] - b - 1, next_parent_id, next_gp_id) # mask out empty actions if existing_bets_count == 0: self.lookahead.empty_action_mask[ layer + 1][terminal_actions_count:, next_parent_id, next_gp_id, :] = 0 else: self.lookahead.empty_action_mask[layer + 1][ terminal_actions_count:-existing_bets_count, next_parent_id, next_gp_id, :] = 0 else: # node has full action count, easy to handle for child_id in range(len(node.children)): child_node = node.children[child_id] # go deeper self.set_datastructures_from_tree_dfs( child_node, layer + 1, child_id, next_parent_id, next_gp_id)