コード例 #1
0
ファイル: terminalequity.py プロジェクト: easefunSZ/SPP
    def set_board(self, board):

        # matrix [1326*1326]

        # [1.0] set call matrix (only works for last round)
        assert board.size(0) == 5

        call_matrix = arguments.tensor(arguments.hole_count,
                                       arguments.hole_count)
        # self.board_mask = Mask.get_board_mask(board)

        # hand evaluation, get strength vector
        _strength = (c_int * 1326)()
        _board = (c_int * 5)()
        for i in range(board.size(0)):
            _board[i] = int(board[i])
        dll.eval5Board(_board, 5, _strength)
        strength_list = [x for x in _strength]
        strength = arguments.tensor(strength_list)

        self.board_mask = strength.clone().fill_(1)
        self.board_mask[strength < 0] = 0

        # bm = Mask.get_board_mask(board)
        # print((bm == self.board_mask).sum())

        assert int((self.board_mask > 0).sum()) == 1081

        # construct row view and column view, construct win/lose/tie matrix
        strength_view1 = strength.view(arguments.hole_count,
                                       1).expand_as(call_matrix)
        strength_view2 = strength.view(
            1, arguments.hole_count).expand_as(call_matrix)

        call_matrix[torch.lt(strength_view1, strength_view2)] = 1
        call_matrix[torch.gt(strength_view1, strength_view2)] = -1
        call_matrix[torch.eq(strength_view1, strength_view2)] = 0.5
        # mask out hole cards which conflict each other
        call_matrix[self.hole_mask < 1] = 0
        # mask out hole card which conflict boards
        call_matrix[strength_view1 == -1] = 0
        call_matrix[strength_view2 == -1] = 0

        # [2.0] set fold matrix
        fold_matrix = arguments.tensor(arguments.hole_count,
                                       arguments.hole_count)
        # make sure player hole don't conflict with opponent hole
        fold_matrix.copy_(self.hole_mask)
        # make sure hole don't conflict with board
        fold_matrix[strength_view1 == -1] = 0
        fold_matrix[strength_view2 == -1] = 0

        self.call_matrix = call_matrix
        self.fold_matrix = fold_matrix
コード例 #2
0
ファイル: tree_values.py プロジェクト: easefunSZ/SPP
    def _fill_ranges_dfs(self, node, ranges_absolute):
        node.ranges_absolute = ranges_absolute.clone()

        player_index = node.current_player
        opponent_index = 1 - player_index

        if node.terminal:
            return

        assert (node.strategy is not None)

        actions_count = len(node.children)

        strategy_to_check = node.strategy

        hand_mask = Mask.get_board_mask(node.board)

        if node.current_player != constants.chance_player:
            check_sum = strategy_to_check.sum(0)

            assert (strategy_to_check.lt(0).sum() == 0)
            assert (check_sum.lt(0.999).sum() == 0)
            assert (check_sum.gt(1.001).sum() == 0)
            assert (check_sum.ne(check_sum).sum() == 0)

        assert (node.ranges_absolute.lt(0).sum() == 0)
        assert (node.ranges_absolute.gt(1).sum() == 0)

        impossible_hand_mask = hand_mask.clone().fill_(1) - hand_mask
        impossible_range_sum = node.ranges_absolute.clone()\
            .mul_(impossible_hand_mask.view(1, arguments.hole_count).expand_as(node.ranges_absolute))

        assert (impossible_range_sum.sum() == 0)

        children_ranges_absolute = arguments.tensor(actions_count, 2,
                                                    arguments.hole_count)

        if node.current_player == constants.chance_player:
            children_ranges_absolute[:, 0, :].copy_(
                node.ranges_absolute[0].repeat(actions_count, 1))
            children_ranges_absolute[:, 1, :].copy_(
                node.ranges_absolute[1].repeat(actions_count, 1))

            children_ranges_absolute[:, 0, :].mul_(node.strategy)
            children_ranges_absolute[:, 1, :].mul_(node.strategy)
        else:
            children_ranges_absolute[:, opponent_index, :] = \
                node.ranges_absolute[opponent_index].clone().repeat(actions_count,1)

            ranges_mul_matrix = node.ranges_absolute[player_index].repeat(
                actions_count, 1)
            children_ranges_absolute[:,
                                     player_index, :] = node.strategy * ranges_mul_matrix

        # fill ranges for children
        for i in range(actions_count):
            child_node = node.children[i]
            child_range = children_ranges_absolute[i]

            self._fill_ranges_dfs(child_node, child_range)
コード例 #3
0
    def _fill_uniformly(cls, node):
        cp = node.current_player
        assert (cp == constants.p1_player or cp == constants.p2_player)

        if node.terminal:
            return

        children_number = len(node.children)
        node.strategy = arguments.tensor(children_number, 1326).fill_(1.0 / children_number)
コード例 #4
0
 def get_board_mask(cls, board):
     out = arguments.tensor(arguments.hole_count).fill_(1)
     s = set([int(x) for x in board])
     for s_card in range(arguments.card_count - 1):
         for b_card in range(s_card + 1, arguments.card_count):
             if s_card in s or b_card in s:
                 index = b_card * (b_card - 1) // 2 + s_card
                 out[index] = 0
     return out
コード例 #5
0
    def get_possible_bets(self, node):
        current_player = node.current_player
        player_bet = node.bets[current_player]
        opponent_bet = node.bets[1-current_player]

        assert (player_bet <= opponent_bet)

        max_raise_size = arguments.stack - opponent_bet
        min_raise_size = opponent_bet - node.bets[current_player]
        min_raise_size = max(min_raise_size, arguments.BB)
        min_raise_size = min(max_raise_size, min_raise_size)

        # [1]. raise is not valid (oppo bets 20000, player can only call)
        if min_raise_size == 0:
            return torch.FloatTensor()
        # [2]. can only all-in
        elif min_raise_size == max_raise_size:
            out = torch.FloatTensor(1,2).fill_(opponent_bet)
            out[0, current_player] = opponent_bet + min_raise_size
            return out
        else:
        # [3]. iterate through all bets, check if they are valid
            max_possible_bets_count = len(self.pot_fractions) + 1
            out = arguments.tensor(max_possible_bets_count, 2).fill_(opponent_bet)

            pot = 2 * opponent_bet
            used_bets_count = 0

            for i in range(len(self.pot_fractions)):
                raise_size = pot * self.pot_fractions[i]
                if min_raise_size <= raise_size < max_raise_size:
                    out[used_bets_count, current_player] = opponent_bet + raise_size
                    used_bets_count += 1
                # end if
            # end for

            assert (used_bets_count < max_possible_bets_count)

            out[used_bets_count, current_player] = opponent_bet + max_raise_size
            used_bets_count += 1

            return out[0:used_bets_count]
コード例 #6
0
    def _build_tree_dfs(self, current_node):
        current_node.pot = current_node.bets.min()
        children = self._get_children(current_node)
        current_node.children = children

        child_number = len(children)
        depth = 0
        current_node.actions = arguments.tensor(child_number)

        for i in range(child_number):
            children[i].parent = current_node
            self._build_tree_dfs(children[i])
            depth = children[i].depth if children[i].depth > depth else depth

            if i == 0:
                current_node.actions[i] = constants.fold_action
            elif i == 1:
                current_node.actions[i] = constants.ccall_action
            else:
                current_node.actions[i] = children[i].bets.max()

        current_node.depth = depth + 1
コード例 #7
0
ファイル: test_tree_cfr.py プロジェクト: easefunSZ/SPP
from game.enums import Round
from game.betsizing import BetSizing
from setting import arguments, constants
from tree.tree_builder import TreeBuilder
from cfr.tree_cfr import TreeCFR
from equity.mask import Mask
from cfr.tree_values import TreeValues

params = {}
params['street'] = Round.RIVER
params['bets'] = arguments.tensor(2).fill_(3000)
params['current_player'] = constants.p1_player
params['board'] = arguments.tensor([0, 5, 16, 29, 34])
# params['board'] = arguments.tensor([47, 48, 49, 50, 51])
params['bet_sizing'] = BetSizing(pot_fractions=[1])
params['limit_to_street'] = True

tb = TreeBuilder()
root = tb.build_tree(params)

ranges = arguments.tensor(2, arguments.hole_count).fill_(1)

tree_cfr = TreeCFR()
valid_hole_mask = Mask.get_board_mask(root.board).view(1, arguments.hole_count)
expand_mask = valid_hole_mask.expand_as(ranges)

ranges.mul_(expand_mask)
ranges_sum = ranges.sum(1).view(2, 1)
ranges.div_(ranges_sum.expand(2, arguments.hole_count))

tree_cfr.run_cfr(root, ranges, 10000)
コード例 #8
0
ファイル: tree_values.py プロジェクト: easefunSZ/SPP
    def _compute_values_dfs(self, node):

        player_index = node.current_player
        opponent_index = 1 - player_index

        if node.terminal:
            assert (node.node_type == constants.terminal_fold_node
                    or node.node_type == constants.terminal_call_node)

            # construct equity matrix
            key = ' '.join([str(int(x)) for x in node.board])
            terminal_equity = self._cached_terminal_equity.get(key, None)
            if terminal_equity is None:
                terminal_equity = TerminalEquity()
                terminal_equity.set_board(node.board)
                self._cached_terminal_equity[key] = terminal_equity

            # compute terminal node values
            values = node.ranges_absolute.clone().fill_(0)
            if node.node_type == constants.terminal_call_node:
                values[0] = torch.matmul(node.ranges_absolute[1],
                                         terminal_equity.call_matrix)
                values[1] = torch.matmul(node.ranges_absolute[0],
                                         terminal_equity.call_matrix)
            else:  # terminal fold node
                values[0] = torch.matmul(node.ranges_absolute[1],
                                         terminal_equity.fold_matrix)
                values[1] = torch.matmul(node.ranges_absolute[0],
                                         terminal_equity.fold_matrix)
                values[opponent_index, :].mul_(-1)

            values.mul_(node.pot)
            node.cf_values = values.view_as(node.ranges_absolute)
            node.cf_br_values = values.view_as(node.ranges_absolute)
        else:
            actions_count = len(node.children)
            hole_count = node.ranges_absolute.size(1)

            cf_values_allactions = arguments.tensor(actions_count, 2,
                                                    hole_count).fill_(0)
            cf_br_values_allactions = arguments.tensor(actions_count, 2,
                                                       hole_count).fill_(0)

            for i in range(actions_count):
                child_node = node.children[i]
                self._compute_values_dfs(child_node)
                cf_values_allactions[i] = child_node.cf_values
                cf_br_values_allactions[i] = child_node.cf_br_values
            # end for

            # compute values of this node according to its children values
            node.cf_values = arguments.tensor(2, hole_count).fill_(0)
            node.cf_br_values = arguments.tensor(2, hole_count).fill_(0)

            # strategy [actions * range]
            strategy_mul_matrix = node.strategy.view(actions_count, hole_count)

            # compute cfvs
            if player_index == constants.chance_player:
                node.cf_values = cf_values_allactions.sum(0)
                node.cf_br_values = cf_br_values_allactions.sum(0)
            else:
                node.cf_values[player_index] = (
                    strategy_mul_matrix *
                    cf_values_allactions[:, player_index, :]).sum(0)
                node.cf_values[
                    opponent_index] = cf_br_values_allactions[:,
                                                              opponent_index, :].sum(
                                                                  0)

                # compute br strategy
                node.cf_br_values[
                    opponent_index] = cf_br_values_allactions[:,
                                                              opponent_index, :].sum(
                                                                  0)
                node.cf_br_values[
                    player_index] = cf_br_values_allactions[:,
                                                            player_index, :].max(
                                                                0)[0]
        # end if terminal else

        # cf values weighted by reach probability
        node.cfv_infset = arguments.tensor(2)
        node.cfv_infset[0] = (node.cf_values[0] *
                              node.ranges_absolute[0]).sum()
        node.cfv_infset[1] = (node.cf_values[1] *
                              node.ranges_absolute[1]).sum()

        # cfv-br values weighted by reach probability
        node.cfv_br_infset = arguments.tensor(2)
        node.cfv_br_infset[0] = (node.cfv_br_infset[0] *
                                 node.ranges_absolute[0]).sum()
        node.cfv_br_infset[1] = (node.cfv_br_infset[1] *
                                 node.ranges_absolute[1]).sum()

        node.epsilon = node.cfv_br_infset - node.cfv_infset
        node.exploitability = node.epsilon.mean()