コード例 #1
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
    def test_get_possible_range_idxs_leduc(self):

        for n in range(2, 9):
            env_bldr = get_leduc_env_bldr()

            # if actually blocked
            for c in range(env_bldr.rules.N_CARDS_IN_DECK):
                board_2d = env_bldr.lut_holder.get_2d_cards(
                    np.array([c], dtype=np.int32))
                result = PokerRange.get_possible_range_idxs(
                    rules=env_bldr.rules,
                    lut_holder=env_bldr.lut_holder,
                    board_2d=board_2d)

                should_be = np.delete(
                    np.arange(env_bldr.rules.RANGE_SIZE, dtype=np.int32), c)

                assert np.array_equal(a1=result, a2=should_be)

            # if nothing blocked
            board_2d = np.array([Poker.CARD_NOT_DEALT_TOKEN_2D], dtype=np.int8)
            result = PokerRange.get_possible_range_idxs(
                rules=env_bldr.rules,
                lut_holder=env_bldr.lut_holder,
                board_2d=board_2d)

            should_be = np.arange(env_bldr.rules.RANGE_SIZE, dtype=np.int32)

            assert np.array_equal(a1=result, a2=should_be)
コード例 #2
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
    def test_normalize(self):
        env_bldr = get_leduc_env_bldr()
        range_ = PokerRange(env_bldr=env_bldr)

        range_._range = np.random.random(size=env_bldr.rules.RANGE_SIZE)
        range_.normalize()
        np.testing.assert_allclose(np.sum(range_._range), 1, atol=0.0001)
コード例 #3
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
    def test_normalize_all_zero(self):
        env_bldr = get_leduc_env_bldr()
        range_ = PokerRange(env_bldr=env_bldr)

        range_._range = np.zeros_like(range_._range)
        range_.normalize()
        np.testing.assert_allclose(np.sum(range_._range), 1, atol=0.0001)
コード例 #4
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
    def test_get_new_blockers_1d_leduc(self):
        env_bldr = get_leduc_env_bldr()
        range_ = PokerRange(env_bldr=env_bldr)

        full_board = np.array([[2, 1]], dtype=np.int8)

        should_be = {
            Poker.PREFLOP: env_bldr.lut_holder.get_1d_cards(full_board[:0]),
            Poker.FLOP: env_bldr.lut_holder.get_1d_cards(full_board),
        }

        for _round in [Poker.FLOP]:
            _n = env_bldr.lut_holder.DICT_LUT_N_CARDS_OUT[
                Poker.FLOP] - env_bldr.lut_holder.DICT_LUT_N_CARDS_OUT[_round]

            if _round == Poker.FLOP:
                board_2d = np.copy(full_board)
            else:
                board_2d = np.concatenate(
                    (full_board[:env_bldr.lut_holder.
                                DICT_LUT_N_CARDS_OUT[_round]],
                     np.array(
                         [Poker.CARD_NOT_DEALT_TOKEN_2D for _ in range(_n)],
                         dtype=np.int8)))

            result = range_._get_new_blockers_1d(game_round=_round,
                                                 board_2d=board_2d)
            assert np.array_equal(a1=result, a2=should_be[_round])
コード例 #5
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
    def test_get_new_blockers_1d_holdem(self):
        env_bldr = get_holdem_env_bldr()

        range_ = PokerRange(env_bldr=env_bldr)

        full_board = np.array([[1, 2], [3, 3], [12, 1], [5, 2], [6, 0]],
                              dtype=np.int8)

        should_be = {
            Poker.PREFLOP: env_bldr.lut_holder.get_1d_cards(full_board[:0]),
            Poker.FLOP: env_bldr.lut_holder.get_1d_cards(full_board[0:3]),
            Poker.TURN: env_bldr.lut_holder.get_1d_cards(full_board[3:4]),
            Poker.RIVER: env_bldr.lut_holder.get_1d_cards(full_board[4:5]),
        }

        for _round in [Poker.PREFLOP, Poker.FLOP, Poker.TURN, Poker.RIVER]:
            _n = env_bldr.lut_holder.DICT_LUT_N_CARDS_OUT[Poker.RIVER] - \
                 env_bldr.lut_holder.DICT_LUT_N_CARDS_OUT[_round]

            if _round == Poker.RIVER:
                board_2d = np.copy(full_board)
            else:
                board_2d = np.concatenate(
                    (full_board[:env_bldr.lut_holder.
                                DICT_LUT_N_BOARD_BRANCHES[_round]],
                     np.array(
                         [Poker.CARD_NOT_DEALT_TOKEN_2D for _ in range(_n)],
                         dtype=np.int8)))

            result = range_._get_new_blockers_1d(game_round=_round,
                                                 board_2d=board_2d)
            assert np.array_equal(a1=result, a2=should_be[_round])
コード例 #6
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
    def test_get_card_probs_holdem(self):
        env_bldr = get_holdem_env_bldr()
        range_ = PokerRange(env_bldr=env_bldr)

        cards_to_remove = np.array([0, 3, 6, 33, 21, 51], np.int8)

        # use previously tested method to make this test easier
        range_.set_cards_to_zero_prob(
            cards_2d=env_bldr.lut_holder.get_2d_cards(cards_to_remove))

        r = range_.get_card_probs()
        assert np.allclose(np.sum(r), 2, atol=0.00001)
        for c in cards_to_remove:
            assert np.allclose(r[c], 0, atol=0.00001)
コード例 #7
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
    def test_remove_cards_from_raw_range_holdem(self):
        env_bldr = get_holdem_env_bldr()
        range_ = PokerRange(env_bldr=env_bldr)

        holdem_cards = np.array([[7, 2], [6, 0]], dtype=np.int8)

        range_.set_cards_to_zero_prob(cards_2d=holdem_cards)

        _ra = range_._range.reshape(-1, env_bldr.rules.RANGE_SIZE)
        for i in range(_ra.shape[0]):
            np.testing.assert_allclose(np.sum(_ra[i]), 1, atol=0.00001)
        _assert_cards_not_in_ranges(cards_2d=holdem_cards,
                                    ranges=_ra,
                                    rules=env_bldr.rules,
                                    lut_holder=env_bldr.lut_holder)
コード例 #8
0
ファイル: StrategyFiller.py プロジェクト: zxpower/PokerRL
    def _fill_chance_node_strategy(self, node):
        assert node.strategy is None
        if node.is_terminal:
            return

        if node.p_id_acting_next == self._tree.CHANCE_ID:
            game_round = node.children[0].env_state[EnvDictIdxs.current_round]
            n_children = len(node.children)
            assert n_children == self._env_bldr.lut_holder.DICT_LUT_N_BOARDS[
                game_round]

            # chance nodes are uniform random
            node.strategy = np.zeros(shape=(self._env_bldr.rules.RANGE_SIZE,
                                            n_children),
                                     dtype=np.float32)

            # set strategy for impossible hands to 0
            for c_id in range(n_children):
                mask = PokerRange.get_possible_range_idxs(
                    rules=self._env_bldr.rules,
                    lut_holder=self._env_bldr.lut_holder,
                    board_2d=node.children[c_id].env_state[
                        EnvDictIdxs.board_2d])
                node.strategy[
                    mask,
                    c_id] = 1.0 / (self._env_bldr.rules.N_CARDS_IN_DECK - 2)

        for c in node.children:
            self._fill_chance_node_strategy(node=c)
コード例 #9
0
    def __init__(self, t_prof, chief_handle, eval_agent_cls):
        assert t_prof.n_seats == 2

        self.t_prof = t_prof
        self.lbr_args = t_prof.module_args["lbr"]
        self._eval_env_bldr = _util.get_env_builder_lbr(t_prof=t_prof)
        self.check_to_round = self.lbr_args.lbr_check_to_round

        self.chief_handle = chief_handle

        self.agent = _AgentWrapper(t_prof=t_prof, lbr_args=self.lbr_args, eval_agent_cls=eval_agent_cls)

        # has different raise sizes than agent's env! This needs to be considered when updating the envs after opp acts
        self._env = None
        self.agent_range = PokerRange(env_bldr=self._eval_env_bldr)

        assert self.check_to_round is None or (self.check_to_round in self._eval_env_bldr.rules.ALL_ROUNDS_LIST)
コード例 #10
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
    def test_create(self):
        env_bldr = get_leduc_env_bldr()
        range_ = PokerRange(env_bldr=env_bldr)

        assert np.allclose(np.sum(range_._range.reshape(
            -1, env_bldr.rules.RANGE_SIZE),
                                  axis=1),
                           1,
                           atol=0.0001)
コード例 #11
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
    def test_get_possible_range_idxs_holdem(self):
        env_bldr = get_holdem_env_bldr()
        for n in range(2, 9):
            board_2d = np.array(
                [[0, 0], [5, 2], [12, 3], Poker.CARD_NOT_DEALT_TOKEN_2D],
                dtype=np.int8)
            result = PokerRange.get_possible_range_idxs(
                rules=env_bldr.rules,
                lut_holder=env_bldr.lut_holder,
                board_2d=board_2d)

            assert result.shape[0] == 1176  # nck of 49:2

            # all of these should be blocked
            for e in [0, 1, 2, 3, 4, 50, 1325]:
                assert not np.any(result == e)
コード例 #12
0
ファイル: look_up_table.py プロジェクト: zxpower/PokerRL
    def get_n_board_branches_LUT(self):
        _N_CARDS_DEALT_IN_TRANSITION_TO_LUT = self.get_n_cards_dealt_in_transition_to_LUT(
        )
        _N_CARDS_OUT_AT = self.get_n_cards_out_at_LUT()
        lut = {Poker.PREFLOP: 0}
        for r in [
                _r for _r in self.rules.ALL_ROUNDS_LIST if _r != Poker.PREFLOP
        ]:
            nc = self.rules.N_CARDS_IN_DECK \
                 - _N_CARDS_OUT_AT[self.rules.ROUND_BEFORE[r]] \
                 - self.rules.N_HOLE_CARDS

            # get_range_size is actually a general combinatorial function that we can also use here
            lut[r] = PokerRange.get_range_size(
                n_hole_cards=_N_CARDS_DEALT_IN_TRANSITION_TO_LUT[r],
                n_cards_in_deck=nc)
        return lut
コード例 #13
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
 def test_get_range_size(self):
     assert PokerRange.get_range_size(n_hole_cards=2,
                                      n_cards_in_deck=52) == 1326
     assert PokerRange.get_range_size(n_hole_cards=4,
                                      n_cards_in_deck=52) == 270725
     assert PokerRange.get_range_size(n_hole_cards=1,
                                      n_cards_in_deck=52) == 52
     assert PokerRange.get_range_size(n_hole_cards=1,
                                      n_cards_in_deck=6) == 6
     assert PokerRange.get_range_size(n_hole_cards=1,
                                      n_cards_in_deck=6) == 6
     assert PokerRange.get_range_size(n_hole_cards=3,
                                      n_cards_in_deck=6) == 20
コード例 #14
0
ファイル: LocalLBRWorker.py プロジェクト: zxpower/PokerRL
class LocalLBRWorker:
    """
    Slave to EvalLBRMaster. Does the LBR computation as described in https://arxiv.org/abs/1612.07547
    """
    def __init__(self, t_prof, chief_handle, eval_agent_cls):
        assert t_prof.n_seats == 2

        self.t_prof = t_prof
        self.lbr_args = t_prof.module_args["lbr"]
        self._eval_env_bldr = _util.get_env_builder_lbr(t_prof=t_prof)
        self.check_to_round = self.lbr_args.lbr_check_to_round

        self.chief_handle = chief_handle

        self.agent = _AgentWrapper(t_prof=t_prof,
                                   lbr_args=self.lbr_args,
                                   eval_agent_cls=eval_agent_cls)

        # has different raise sizes than agent's env! This needs to be considered when updating the envs after opp acts
        self._env = None
        self.agent_range = PokerRange(env_bldr=self._eval_env_bldr)

        assert self.check_to_round is None or (
            self.check_to_round in self._eval_env_bldr.rules.ALL_ROUNDS_LIST)

    def run(self, agent_seat_id, n_iterations, mode, stack_size):
        """ returns an estimate of a lower bound of the exploitablity of the agent """

        self.agent.set_mode(mode=mode)
        self.agent.to_stack_size(stack_size)
        self.agent_range.reset()

        self._env = self._eval_env_bldr.get_new_env(is_evaluating=True,
                                                    stack_size=stack_size)

        if not self.agent.can_compute_mode():
            return None

        if self._eval_env_bldr.env_cls.IS_FIXED_LIMIT_GAME:
            return self._run_limit(agent_seat_id=agent_seat_id,
                                   n_iterations=n_iterations)
        else:
            return self._run_no_limit(agent_seat_id=agent_seat_id,
                                      n_iterations=n_iterations)

    def update_weights(self, weights_for_eval_agent):
        self.agent.update_weights(weights_for_eval_agent)

    def _reset_episode(self):
        ret = self._env.reset()
        self.agent.reset(deck_state_dict=self._env.cards_state_dict())
        self.agent_range.reset()
        return ret

    def _run_limit(self, agent_seat_id, n_iterations):
        total_lbr_winnings = np.empty(shape=n_iterations, dtype=np.float32)
        lbr_seat_id = 1 - agent_seat_id

        for iteration_id in range(n_iterations):
            if iteration_id % 50 == 0:
                print("LBR hand: ", iteration_id)

            # """""""""""""""""
            # Reset
            # """""""""""""""""
            env_obs, reward, terminal, info = self._reset_episode()

            lbr_hand = self._env.get_hole_cards_of_player(p_id=lbr_seat_id)
            self.agent_range.set_cards_to_zero_prob(cards_2d=lbr_hand)

            # """""""""""""""""
            # Play Episode
            # """""""""""""""""
            while not terminal:
                p_id_acting = self._env.current_player.seat_id

                if self.t_prof.DEBUGGING:
                    assert p_id_acting == self.agent.cpu_agent._internal_env_wrapper.env.current_player.seat_id

                if p_id_acting == lbr_seat_id:
                    # optional feature: check the first N rounds 100% as LBR
                    if (self.check_to_round is not None) and (
                            self._env.current_round < self.check_to_round):
                        action_int = Poker.CHECK_CALL

                    else:
                        _rollout_mngr = _LBRRolloutManager(
                            t_prof=self.t_prof,
                            env_bldr=self._eval_env_bldr,
                            env=self._env,
                            lbr_hand_2d=lbr_hand)

                        # illegal: -1, fold: 0, all other: any float
                        _utility = np.full(shape=3,
                                           fill_value=-1.0,
                                           dtype=np.float32)

                        # ev(s, lbr_a=fold)
                        _utility[Poker.FOLD] = 0.0

                        # ev(s, lbr_a=check_call)
                        _wp = _rollout_mngr.get_lbr_checkdown_equity(
                            agent_range=self.agent_range
                        )  # if check/called down
                        _asked = self._env.seats[
                            agent_seat_id].current_bet - self._env.seats[
                                lbr_seat_id].current_bet
                        _pot_before_action = self._env.get_all_winnable_money()
                        _utility[
                            Poker.CHECK_CALL] = _wp * _pot_before_action - (
                                1 - _wp) * _asked

                        # prepare for raise simulation
                        if Poker.BET_RAISE in self._env.get_legal_actions():
                            _saved_env_state = self._env.state_dict()
                            _saved_agent_env_state = self.agent.env_state_dict(
                            )
                            _saved_agent_range_state = self.agent_range.state_dict(
                            )

                            # compute ev for raise
                            # _________________________________ simulate LBR play r ____________________________________
                            self._env.step(action=Poker.BET_RAISE)
                            _pot_after_raise = self._env.get_all_winnable_money(
                            )

                            self.agent.notify_of_action(
                                p_id_acted=lbr_seat_id,
                                action_he_did=Poker.BET_RAISE)

                            # what agent would do after LBR raises. DOESN'T STEP INTERNAL ENV!
                            _, a_probs_each_hand = self.agent.get_action(
                                step_env=False, need_probs=True)

                            # _______________________________ simulate agent reaction __________________________________
                            # p(agent_fold)
                            _fold_prob = np.sum(
                                self.agent_range.range *
                                a_probs_each_hand[:, Poker.FOLD])

                            # p(not agent_fold | hand)
                            _p_not_fold_per_hand = (
                                1 - a_probs_each_hand[:, Poker.FOLD])

                            # agent_range after not folding
                            self.agent_range.mul_and_norm(_p_not_fold_per_hand)

                            # p(lbr_win | lbr play r -> agent play not fold)
                            _wp_now = _rollout_mngr.get_lbr_checkdown_equity(
                                agent_range=self.agent_range)

                            # ev(state, lbr_a=r)
                            _chips_lbr_puts_in_pot = _pot_after_raise - _pot_before_action
                            _ev_if_fold = _pot_before_action
                            _ev_if_not_fold = (_wp_now * _pot_after_raise) - (
                                (1 - _wp_now) * _chips_lbr_puts_in_pot)
                            _utility[
                                Poker.BET_RAISE] = _fold_prob * _ev_if_fold + (
                                    1 - _fold_prob) * _ev_if_not_fold

                            # ________________________________________ reset ___________________________________________
                            self.agent_range.load_state_dict(
                                _saved_agent_range_state)
                            self._env.load_state_dict(_saved_env_state)
                            self.agent.load_env_state_dict(
                                _saved_agent_env_state)

                        # select action with highest approximated EV
                        action_int = np.argmax(_utility)

                    # ________________________________________ notify agent ____________________________________________
                    self.agent.notify_of_action(p_id_acted=lbr_seat_id,
                                                action_he_did=action_int)

                else:  # agent has to act
                    action_int, a_probs_each_hand = self.agent.get_action(
                        step_env=True, need_probs=True)
                    self.agent_range.update_after_action(
                        action=action_int,
                        all_a_probs_for_all_hands=a_probs_each_hand)

                # _____________________________________________ step ___________________________________________________
                old_game_round = self._env.current_round

                env_obs, reward, terminal, info = self._env.step(
                    action=action_int)

                if self._env.current_round != old_game_round:
                    self.agent_range.update_after_new_round(
                        new_round=self._env.current_round,
                        board_now_2d=self._env.board)

            total_lbr_winnings[iteration_id] = reward[
                lbr_seat_id] * self._env.REWARD_SCALAR * self._env.EV_NORMALIZER

        return total_lbr_winnings

    def _run_no_limit(self, agent_seat_id, n_iterations):
        total_lbr_winnings = np.empty(shape=n_iterations, dtype=np.float32)
        lbr_seat_id = 1 - agent_seat_id
        n_lbr_bets = len(self._env.bet_sizes_list_as_frac_of_pot)

        for iteration_id in range(n_iterations):
            if iteration_id % 50 == 0:
                print("LBR hand: ", iteration_id)

            # """""""""""""""""
            # Reset
            # """""""""""""""""
            env_obs, reward, done, info = self._reset_episode()

            lbr_hand = self._env.get_hole_cards_of_player(p_id=lbr_seat_id)
            self.agent_range.set_cards_to_zero_prob(cards_2d=lbr_hand)

            # """""""""""""""""
            # Play Episode
            # """""""""""""""""
            while not done:
                p_id_acting = self._env.current_player.seat_id

                if self.t_prof.DEBUGGING:
                    assert p_id_acting == self.agent.cpu_agent._internal_env_wrapper.env.current_player.seat_id

                if p_id_acting == lbr_seat_id:

                    # optional feature: check the first N rounds 100% as LBR
                    if (self.check_to_round is not None) and (
                            self._env.current_round < self.check_to_round):
                        action_int = Poker.CHECK_CALL

                    else:
                        _rollout_mngr = _LBRRolloutManager(
                            t_prof=self.t_prof,
                            env_bldr=self._eval_env_bldr,
                            env=self._env,
                            lbr_hand_2d=lbr_hand)

                        # illegal: -1, fold: 0, all other: any float
                        _utility = np.full(shape=2 + n_lbr_bets,
                                           fill_value=-1.0,
                                           dtype=np.float32)

                        # ev(s, lbr_a=fold)
                        _utility[Poker.FOLD] = 0.0

                        # ev(s, lbr_a=check_call)
                        _wp = _rollout_mngr.get_lbr_checkdown_equity(
                            agent_range=self.agent_range)
                        _asked = self._env.seats[
                            agent_seat_id].current_bet - self._env.seats[
                                lbr_seat_id].current_bet
                        _pot_before_action = self._env.get_all_winnable_money()
                        _utility[
                            Poker.CHECK_CALL] = _wp * _pot_before_action - (
                                1 - _wp) * _asked

                        # prepare for raise simulation
                        _saved_env_state = self._env.state_dict()
                        _saved_agent_env_state = self.agent.env_state_dict()
                        _saved_agent_range_state = self.agent_range.state_dict(
                        )
                        _legal_raises = self._env.get_legal_actions()
                        for a in [Poker.FOLD, Poker.CHECK_CALL]:
                            if a in _legal_raises:
                                _legal_raises.remove(a)

                        # compute ev for all raise sizes LBR can choose from
                        for r in _legal_raises:
                            raise_frac = self._env.bet_sizes_list_as_frac_of_pot[
                                r - 2]

                            # _________________________________ simulate LBR play r ____________________________________
                            self._env.step(action=r)
                            _pot_after_raise = self._env.get_all_winnable_money(
                            )

                            self.agent.notify_of_raise_frac_action(
                                p_id_acted=lbr_seat_id, frac=raise_frac)

                            if self.t_prof.DEBUGGING:
                                assert agent_seat_id == self.agent.cpu_agent._internal_env_wrapper.env.current_player.seat_id

                            # what agent would do after LBR raises. DOESN'T STEP INTERNAL ENV!
                            a_probs_each_hand = self.agent.get_a_probs_for_each_hand(
                            )

                            # _______________________________ simulate agent reaction __________________________________
                            # p(agent_fold)
                            _fold_prob = np.sum(
                                self.agent_range.range *
                                a_probs_each_hand[:, Poker.FOLD])

                            # p(not agent_fold | hand)
                            _p_not_fold_per_hand = (
                                1 - a_probs_each_hand[:, Poker.FOLD])

                            # agent_range after not folding
                            self.agent_range.mul_and_norm(_p_not_fold_per_hand)

                            # p(lbr_win | lbr play r -> agent play not fold)
                            _wp_now = _rollout_mngr.get_lbr_checkdown_equity(
                                agent_range=self.agent_range)

                            # ev(state, lbr_a=r)
                            _chips_lbr_puts_in_pot = _pot_after_raise - _pot_before_action
                            _ev_if_fold = _pot_before_action
                            _ev_if_not_fold = (_wp_now * _pot_after_raise) - (
                                (1 - _wp_now) * _chips_lbr_puts_in_pot)
                            _utility[r] = _fold_prob * _ev_if_fold + (
                                1 - _fold_prob) * _ev_if_not_fold

                            # ________________________________________ reset ___________________________________________
                            self.agent_range.load_state_dict(
                                _saved_agent_range_state)
                            self._env.load_state_dict(_saved_env_state)
                            self.agent.load_env_state_dict(
                                _saved_agent_env_state)

                        # select action with highest approximated EV
                        action_int = np.argmax(_utility)

                    # ________________________________________ notify agent ____________________________________________
                    if action_int >= 2:
                        raise_frac = self._env.bet_sizes_list_as_frac_of_pot[
                            action_int - 2]
                        self.agent.notify_of_raise_frac_action(
                            p_id_acted=lbr_seat_id, frac=raise_frac)
                    else:
                        self.agent.notify_of_action(p_id_acted=lbr_seat_id,
                                                    action_he_did=action_int)

                else:  # agent has to act
                    if self.t_prof.DEBUGGING:
                        assert p_id_acting == self.agent.cpu_agent._internal_env_wrapper.env.current_player.seat_id

                    action_int, a_probs_each_hand = self.agent.get_action(
                        step_env=True, need_probs=True)

                    self.agent_range.update_after_action(
                        action=action_int,
                        all_a_probs_for_all_hands=a_probs_each_hand)
                    if action_int >= 2:
                        # querying what the bet size is in the agent's env_args (these might differ from LBR's!).
                        raise_frac = \
                            self.agent.cpu_agent.env_bldr.env_args.bet_sizes_list_as_frac_of_pot[action_int - 2]

                # _____________________________________________ step ___________________________________________________
                old_game_round = self._env.current_round

                if action_int >= 2:  # step with fraction because agent and LBR have different raise sizes
                    env_obs, reward, done, info = self._env.step_raise_pot_frac(
                        pot_frac=raise_frac)
                else:
                    env_obs, reward, done, info = self._env.step(
                        action=action_int)

                if self._env.current_round != old_game_round:
                    self.agent_range.update_after_new_round(
                        new_round=self._env.current_round,
                        board_now_2d=self._env.board)

            total_lbr_winnings[iteration_id] = reward[
                lbr_seat_id] * self._env.REWARD_SCALAR * self._env.EV_NORMALIZER

        return total_lbr_winnings
コード例 #15
0
ファイル: game_rules.py プロジェクト: JarlPed/DREAM-1
class Flop3HoldemRules:
    """
    General rules of Texas Hold'em poker games
    """
    N_HOLE_CARDS = 2
    N_RANKS = 13
    N_SUITS = 4
    N_CARDS_IN_DECK = N_RANKS * N_SUITS
    RANGE_SIZE = PokerRange.get_range_size(n_hole_cards=N_HOLE_CARDS, n_cards_in_deck=N_CARDS_IN_DECK)

    BTN_IS_FIRST_POSTFLOP = False

    N_FLOP_CARDS = 3
    N_TURN_CARDS = 0
    N_RIVER_CARDS = 0
    N_TOTAL_BOARD_CARDS = N_FLOP_CARDS
    ALL_ROUNDS_LIST = [Poker.PREFLOP, Poker.FLOP]

    SUITS_MATTER = True

    ROUND_BEFORE = {
        Poker.PREFLOP: Poker.PREFLOP,
        Poker.FLOP: Poker.PREFLOP,
        Poker.TURN: None,
        Poker.RIVER: None,
    }
    ROUND_AFTER = {
        Poker.PREFLOP: Poker.FLOP,
        Poker.FLOP: None,
        Poker.TURN: None,
        Poker.RIVER: None,
    }

    RANK_DICT = {
        Poker.CARD_NOT_DEALT_TOKEN_1D: "",
        0: "2",
        1: "3",
        2: "4",
        3: "5",
        4: "6",
        5: "7",
        6: "8",
        7: "9",
        8: "T",
        9: "J",
        10: "Q",
        11: "K",
        12: "A"
    }
    SUIT_DICT = {
        Poker.CARD_NOT_DEALT_TOKEN_1D: "",
        0: "h",
        1: "d",
        2: "s",
        3: "c"
    }

    STRING = "FLOP3_HOLDEM_RULES"

    def __init__(self):
        from PokerRL.game._.cpp_wrappers.CppHandeval import CppHandeval

        self._clib = CppHandeval()

    def get_hand_rank_all_hands_on_given_boards(self, boards_1d, lut_holder):
        """
        for docs refer to PokerEnv
        """
        raise NotImplementedError(
            "Batched hand eval for multiple boards is unfortunately not implemented for FHP at the moment."
        )  # TODO

    def get_hand_rank(self, hand_2d, board_2d):
        """
        for docs refer to PokerEnv
        """
        return self._clib.get_hand_rank_fhp3(hand_2d=hand_2d, board_2d=board_2d)

    @classmethod
    def get_lut_holder(cls):
        from PokerRL.game._.look_up_table import LutHolderHoldem

        return LutHolderHoldem(cls)
コード例 #16
0
ファイル: game_rules.py プロジェクト: JarlPed/DREAM-1
class BigLeducRules:
    N_HOLE_CARDS = 1
    N_RANKS = 12
    N_SUITS = 2
    N_CARDS_IN_DECK = N_RANKS * N_SUITS
    RANGE_SIZE = PokerRange.get_range_size(n_hole_cards=N_HOLE_CARDS, n_cards_in_deck=N_CARDS_IN_DECK)

    BTN_IS_FIRST_POSTFLOP = True

    N_FLOP_CARDS = 1
    N_TURN_CARDS = 0
    N_RIVER_CARDS = 0
    N_TOTAL_BOARD_CARDS = N_FLOP_CARDS + N_TURN_CARDS + N_RIVER_CARDS
    ALL_ROUNDS_LIST = [Poker.PREFLOP, Poker.FLOP]

    SUITS_MATTER = False

    ROUND_BEFORE = {
        Poker.PREFLOP: Poker.PREFLOP,
        Poker.FLOP: Poker.PREFLOP
    }
    ROUND_AFTER = {
        Poker.PREFLOP: Poker.FLOP,
        Poker.FLOP: None
    }

    RANK_DICT = {i: str(i + 2) for i in range(N_RANKS)}
    SUIT_DICT = {k: ["a", "b", "c", "d", "e", "f", "g"][k] for k in range(N_SUITS)} \
        if N_SUITS < 8 \
        else {i: "_" + str(i) for i in range(N_SUITS)}

    STRING = "BIG_LEDUC_RULES"

    def __init__(self):
        pass

    def get_hand_rank_all_hands_on_given_boards(self, boards_1d, lut_holder):
        """
        for general docs refer to PokerEnv
        """
        hand_ranks = np.full(shape=(boards_1d.shape[0], LeducRules.RANGE_SIZE), fill_value=-1, dtype=np.int32)
        for board_idx in range(boards_1d.shape[0]):
            for range_idx in range(LeducRules.RANGE_SIZE):
                hand_ranks[board_idx, range_idx] = self.get_hand_rank(
                    hand_2d=lut_holder.get_2d_hole_cards_from_range_idx(range_idx=range_idx),
                    board_2d=lut_holder.get_2d_cards(cards_1d=boards_1d[board_idx]))

        return hand_ranks

    def get_hand_rank(self, hand_2d, board_2d):
        """
        for docs refer to PokerEnv
        """
        if board_2d[0, 0] == hand_2d[0, 0]:
            return 10000 + hand_2d[0, 0]
        else:
            return hand_2d[0, 0]

    @classmethod
    def get_lut_holder(cls):
        from PokerRL.game._.look_up_table import LutHolderLeduc

        return LutHolderLeduc(cls)
コード例 #17
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
 def test_save_load(self):
     env_bldr = get_leduc_env_bldr()
     range_ = PokerRange(env_bldr=env_bldr)
     range_.load_state_dict(range_.state_dict())
コード例 #18
0
ファイル: test_rangeManager.py プロジェクト: zxpower/PokerRL
    def test_get_card_probs_leduc(self):
        env_bldr = get_leduc_env_bldr()
        range_ = PokerRange(env_bldr=env_bldr)

        assert np.array_equal(range_.get_card_probs(), range_._range)
コード例 #19
0
class PLORules:
    """
    General rules of Pot Limit Omaha
    """
    N_HOLE_CARDS = 4
    N_RANKS = 13
    N_SUITS = 4
    N_CARDS_IN_DECK = N_RANKS * N_SUITS
    RANGE_SIZE = PokerRange.get_range_size(n_hole_cards=N_HOLE_CARDS, n_cards_in_deck=N_CARDS_IN_DECK)

    BTN_IS_FIRST_POSTFLOP = False

    N_FLOP_CARDS = 3
    N_TURN_CARDS = 1
    N_RIVER_CARDS = 1
    N_TOTAL_BOARD_CARDS = N_FLOP_CARDS + N_TURN_CARDS + N_RIVER_CARDS
    ALL_ROUNDS_LIST = [Poker.PREFLOP, Poker.FLOP, Poker.TURN, Poker.RIVER]

    SUITS_MATTER = True

    ROUND_BEFORE = {
        Poker.PREFLOP: Poker.PREFLOP,
        Poker.FLOP: Poker.PREFLOP,
        Poker.TURN: Poker.FLOP,
        Poker.RIVER: Poker.TURN
    }
    ROUND_AFTER = {
        Poker.PREFLOP: Poker.FLOP,
        Poker.FLOP: Poker.TURN,
        Poker.TURN: Poker.RIVER,
        Poker.RIVER: None
    }

    RANK_DICT = {
        Poker.CARD_NOT_DEALT_TOKEN_1D: "",
        0: "2",
        1: "3",
        2: "4",
        3: "5",
        4: "6",
        5: "7",
        6: "8",
        7: "9",
        8: "T",
        9: "J",
        10: "Q",
        11: "K",
        12: "A"
    }
    SUIT_DICT = {
        Poker.CARD_NOT_DEALT_TOKEN_1D: "",
        0: "h",
        1: "d",
        2: "s",
        3: "c"
    }

    STRING = "PLO_RULES"

    def __init__(self):
        from PokerRL.game._.cpp_wrappers.CppHandeval import CppHandeval

        self._clib = CppHandeval()

    def get_hand_rank_all_hands_on_given_boards(self, boards_1d, lut_holder):
        """
        for docs refer to PokerEnv
        returns a numpy array [1,hole_hands_total] where best hand has biggest number
        and not possible hand has -1
        """
        r = self._clib.get_hand_rank_all_hands_on_given_boards_52_holdem(boards_1d=boards_1d, lut_holder=lut_holder)
        return r

    def get_hand_rank(self, hand_2d, board_2d):
        """
        for docs refer to PokerEnv
        """
        r = self._clib.get_hand_rank_52_plo(hand_2d=hand_2d, board_2d=board_2d)
        return r

    @classmethod
    def get_lut_holder(cls):
        from PokerRL.game._.look_up_table import LutHolderPLO

        return LutHolderPLO(cls)