Beispiel #1
0
 def test_get_1d_card(self):
     lh = DiscretizedNLHoldem.get_lut_holder()
     assert lh.get_1d_card(card_2d=[0, 3]) == 3
     assert lh.get_1d_card(card_2d=[12, 3]) == 51
     assert lh.get_1d_card(card_2d=np.array([0, 0], dtype=np.int8)) == 0
     assert lh.get_1d_card(card_2d=np.array([0, 0], dtype=np.int32)) == 0
     assert lh.get_1d_card(card_2d=Poker.CARD_NOT_DEALT_TOKEN_2D) == Poker.CARD_NOT_DEALT_TOKEN_1D
Beispiel #2
0
    def test_create(self):
        lh = DiscretizedNLHoldem.get_lut_holder()

        assert lh.LUT_1DCARD_2_2DCARD.dtype == np.dtype(np.int8)
        assert lh.LUT_2DCARD_2_1DCARD.dtype == np.dtype(np.int8)
        assert lh.LUT_IDX_2_HOLE_CARDS.dtype == np.dtype(np.int8)
        assert lh.LUT_HOLE_CARDS_2_IDX.dtype == np.dtype(np.int16)
Beispiel #3
0
def get_holdem_env_bldr():
    return VanillaEnvBuilder(env_cls=DiscretizedNLHoldem,
                             env_args=DiscretizedNLHoldem.ARGS_CLS(
                                 n_seats=3,
                                 starting_stack_sizes_list=[234] * 3,
                                 bet_sizes_list_as_frac_of_pot=[0.1, 0.4, 1.0],
                                 stack_randomization_range=(-123, 234)))
Beispiel #4
0
    def test_hole_card_luts(self):
        """ tests reversibility """
        lh = DiscretizedNLHoldem.get_lut_holder()
        for h in range(1326):
            _c_1d = lh.get_1d_hole_cards_from_range_idx(
                h)  # Tests 1d conversion
            _c_2d = lh.get_2d_hole_cards_from_range_idx(
                h)  # Tests 2d conversion

            c_1d = (lh.LUT_IDX_2_HOLE_CARDS[h])
            c_2d = np.array([
                lh.LUT_1DCARD_2_2DCARD[c_1d[0]],
                lh.LUT_1DCARD_2_2DCARD[c_1d[1]]
            ],
                            dtype=np.int8)

            assert np.array_equal(c_1d, _c_1d)
            assert np.array_equal(c_2d, _c_2d)

        # Tests inverse and thus validates that the mapping is unique for both 1d and 2d implicitly
        for c1 in range(51):
            for c2 in range(c1 + 1, 50):
                cc1 = lh.LUT_1DCARD_2_2DCARD[c1]
                cc2 = lh.LUT_1DCARD_2_2DCARD[c2]
                hole_cards = np.array([cc1, cc2], dtype=np.int8)
                assert lh.get_range_idx_from_hole_cards(hole_cards) == \
                       lh.LUT_HOLE_CARDS_2_IDX[c1, c2]
Beispiel #5
0
    def test_get_range_idx_from_hole_cards(self):
        lh = DiscretizedNLHoldem.get_lut_holder()
        n = 0
        for c1 in range(51):
            for c2 in range(c1 + 1, 52):
                assert lh.LUT_HOLE_CARDS_2_IDX[c1, c2] == n

                n += 1
Beispiel #6
0
 def test_get_2d_cards(self):
     lh = DiscretizedNLHoldem.get_lut_holder()
     assert np.array_equal(lh.get_2d_cards(cards_1d=np.array([])), np.array([]))
     assert np.array_equal(lh.get_2d_cards(cards_1d=np.array([3])), np.array([[0, 3]]))
     assert np.array_equal(lh.get_2d_cards(cards_1d=np.array([3, 51])), np.array([[0, 3], [12, 3]]))
     assert np.array_equal(lh.get_2d_cards(cards_1d=np.array([])), np.array([]))
     assert np.array_equal(lh.get_2d_cards(cards_1d=np.array([0, Poker.CARD_NOT_DEALT_TOKEN_1D], dtype=np.int8)),
                           np.concatenate((np.array([[0, 0]]), Poker.CARD_NOT_DEALT_TOKEN_2D.reshape(-1, 2))))
Beispiel #7
0
    def run(self, n_games_per_seat):

        REFERENCE_AGENT = 0

        _env = DiscretizedNLHoldem(env_args=self._env_args, is_evaluating=True,
                                   lut_holder=self._lut_holder, hh_logger=self._logger)
        winnings = np.empty(shape=(n_games_per_seat * _env.N_SEATS), dtype=np.float32)

        for seat_p0 in range(_env.N_SEATS):
            seat_p1 = 1 - seat_p0

            # set correct player names here according to positions
            # we rotate players positions to imitate blinds movement

            if self._logger is not None:
                if seat_p0 == REFERENCE_AGENT:
                    self._logger.set_names(('Hero', 'Dummy'))
                else:
                    self._logger.set_names(('Dummy', 'Hero'))

            for _hand_nr in range(n_games_per_seat):
                # """""""""""""""""
                # Reset
                # """""""""""""""""

                _, r_for_all, done, info = _env.reset()

                for e in self._eval_agents:
                    e.reset(deck_state_dict=_env.cards_state_dict())

                # """""""""""""""""
                # Play Episode
                # """""""""""""""""
                while not done:
                    p_id_acting = _env.current_player.seat_id

                    if p_id_acting == seat_p0:
                        action_int, _ = self._eval_agents[REFERENCE_AGENT].get_action(step_env=True, need_probs=False)
                        self._eval_agents[1 - REFERENCE_AGENT].notify_of_action(p_id_acted=p_id_acting,
                                                                                action_he_did=action_int)

                    elif p_id_acting == seat_p1:
                        action_int, _ = self._eval_agents[1 - REFERENCE_AGENT].get_action(step_env=True,
                                                                                          need_probs=False)
                        self._eval_agents[REFERENCE_AGENT].notify_of_action(p_id_acted=p_id_acting,
                                                                            action_he_did=action_int)

                    else:
                        raise ValueError("Only HU is supported!")

                    _, r_for_all, done, info = _env.step(action_int)

                # """""""""""""""""
                # Add Rews
                # """""""""""""""""
                winnings[_hand_nr + (seat_p0 * n_games_per_seat)] = r_for_all[seat_p0] \
                                                                    * _env.REWARD_SCALAR \
                                                                    * _env.EV_NORMALIZER
                if _hand_nr % 100 == 0:
                    print(f"Hand: {_hand_nr} out of {n_games_per_seat}")

        mean = np.mean(winnings).item()
        std = np.std(winnings).item()

        _d = 1.96 * std / np.sqrt(n_games_per_seat * _env.N_SEATS)
        lower_conf95 = mean - _d
        upper_conf95 = mean + _d

        print(f"Played {n_games_per_seat * 2} hands of poker.")
        print("Player 1", self._eval_agents[REFERENCE_AGENT].get_mode() + ":", mean, "milliBB per hand +/-", _d)
        print("Player 2", self._eval_agents[1 - REFERENCE_AGENT].get_mode() + ":", (-mean), "milliBB per hand+/-", _d)

        return float(mean), float(upper_conf95), float(lower_conf95)