def test_get_1d_card(self): lh = DiscretizedNLHoldem.get_lut_holder() assert lh.get_1d_card(card_2d=[0, 3]) == 3 assert lh.get_1d_card(card_2d=[12, 3]) == 51 assert lh.get_1d_card(card_2d=np.array([0, 0], dtype=np.int8)) == 0 assert lh.get_1d_card(card_2d=np.array([0, 0], dtype=np.int32)) == 0 assert lh.get_1d_card(card_2d=Poker.CARD_NOT_DEALT_TOKEN_2D) == Poker.CARD_NOT_DEALT_TOKEN_1D
def test_create(self): lh = DiscretizedNLHoldem.get_lut_holder() assert lh.LUT_1DCARD_2_2DCARD.dtype == np.dtype(np.int8) assert lh.LUT_2DCARD_2_1DCARD.dtype == np.dtype(np.int8) assert lh.LUT_IDX_2_HOLE_CARDS.dtype == np.dtype(np.int8) assert lh.LUT_HOLE_CARDS_2_IDX.dtype == np.dtype(np.int16)
def get_holdem_env_bldr(): return VanillaEnvBuilder(env_cls=DiscretizedNLHoldem, env_args=DiscretizedNLHoldem.ARGS_CLS( n_seats=3, starting_stack_sizes_list=[234] * 3, bet_sizes_list_as_frac_of_pot=[0.1, 0.4, 1.0], stack_randomization_range=(-123, 234)))
def test_hole_card_luts(self): """ tests reversibility """ lh = DiscretizedNLHoldem.get_lut_holder() for h in range(1326): _c_1d = lh.get_1d_hole_cards_from_range_idx( h) # Tests 1d conversion _c_2d = lh.get_2d_hole_cards_from_range_idx( h) # Tests 2d conversion c_1d = (lh.LUT_IDX_2_HOLE_CARDS[h]) c_2d = np.array([ lh.LUT_1DCARD_2_2DCARD[c_1d[0]], lh.LUT_1DCARD_2_2DCARD[c_1d[1]] ], dtype=np.int8) assert np.array_equal(c_1d, _c_1d) assert np.array_equal(c_2d, _c_2d) # Tests inverse and thus validates that the mapping is unique for both 1d and 2d implicitly for c1 in range(51): for c2 in range(c1 + 1, 50): cc1 = lh.LUT_1DCARD_2_2DCARD[c1] cc2 = lh.LUT_1DCARD_2_2DCARD[c2] hole_cards = np.array([cc1, cc2], dtype=np.int8) assert lh.get_range_idx_from_hole_cards(hole_cards) == \ lh.LUT_HOLE_CARDS_2_IDX[c1, c2]
def test_get_range_idx_from_hole_cards(self): lh = DiscretizedNLHoldem.get_lut_holder() n = 0 for c1 in range(51): for c2 in range(c1 + 1, 52): assert lh.LUT_HOLE_CARDS_2_IDX[c1, c2] == n n += 1
def test_get_2d_cards(self): lh = DiscretizedNLHoldem.get_lut_holder() assert np.array_equal(lh.get_2d_cards(cards_1d=np.array([])), np.array([])) assert np.array_equal(lh.get_2d_cards(cards_1d=np.array([3])), np.array([[0, 3]])) assert np.array_equal(lh.get_2d_cards(cards_1d=np.array([3, 51])), np.array([[0, 3], [12, 3]])) assert np.array_equal(lh.get_2d_cards(cards_1d=np.array([])), np.array([])) assert np.array_equal(lh.get_2d_cards(cards_1d=np.array([0, Poker.CARD_NOT_DEALT_TOKEN_1D], dtype=np.int8)), np.concatenate((np.array([[0, 0]]), Poker.CARD_NOT_DEALT_TOKEN_2D.reshape(-1, 2))))
def run(self, n_games_per_seat): REFERENCE_AGENT = 0 _env = DiscretizedNLHoldem(env_args=self._env_args, is_evaluating=True, lut_holder=self._lut_holder, hh_logger=self._logger) winnings = np.empty(shape=(n_games_per_seat * _env.N_SEATS), dtype=np.float32) for seat_p0 in range(_env.N_SEATS): seat_p1 = 1 - seat_p0 # set correct player names here according to positions # we rotate players positions to imitate blinds movement if self._logger is not None: if seat_p0 == REFERENCE_AGENT: self._logger.set_names(('Hero', 'Dummy')) else: self._logger.set_names(('Dummy', 'Hero')) for _hand_nr in range(n_games_per_seat): # """"""""""""""""" # Reset # """"""""""""""""" _, r_for_all, done, info = _env.reset() for e in self._eval_agents: e.reset(deck_state_dict=_env.cards_state_dict()) # """"""""""""""""" # Play Episode # """"""""""""""""" while not done: p_id_acting = _env.current_player.seat_id if p_id_acting == seat_p0: action_int, _ = self._eval_agents[REFERENCE_AGENT].get_action(step_env=True, need_probs=False) self._eval_agents[1 - REFERENCE_AGENT].notify_of_action(p_id_acted=p_id_acting, action_he_did=action_int) elif p_id_acting == seat_p1: action_int, _ = self._eval_agents[1 - REFERENCE_AGENT].get_action(step_env=True, need_probs=False) self._eval_agents[REFERENCE_AGENT].notify_of_action(p_id_acted=p_id_acting, action_he_did=action_int) else: raise ValueError("Only HU is supported!") _, r_for_all, done, info = _env.step(action_int) # """"""""""""""""" # Add Rews # """"""""""""""""" winnings[_hand_nr + (seat_p0 * n_games_per_seat)] = r_for_all[seat_p0] \ * _env.REWARD_SCALAR \ * _env.EV_NORMALIZER if _hand_nr % 100 == 0: print(f"Hand: {_hand_nr} out of {n_games_per_seat}") mean = np.mean(winnings).item() std = np.std(winnings).item() _d = 1.96 * std / np.sqrt(n_games_per_seat * _env.N_SEATS) lower_conf95 = mean - _d upper_conf95 = mean + _d print(f"Played {n_games_per_seat * 2} hands of poker.") print("Player 1", self._eval_agents[REFERENCE_AGENT].get_mode() + ":", mean, "milliBB per hand +/-", _d) print("Player 2", self._eval_agents[1 - REFERENCE_AGENT].get_mode() + ":", (-mean), "milliBB per hand+/-", _d) return float(mean), float(upper_conf95), float(lower_conf95)