def test_get_possible_range_idxs_leduc(self): for n in range(2, 9): env_bldr = get_leduc_env_bldr() # if actually blocked for c in range(env_bldr.rules.N_CARDS_IN_DECK): board_2d = env_bldr.lut_holder.get_2d_cards( np.array([c], dtype=np.int32)) result = PokerRange.get_possible_range_idxs( rules=env_bldr.rules, lut_holder=env_bldr.lut_holder, board_2d=board_2d) should_be = np.delete( np.arange(env_bldr.rules.RANGE_SIZE, dtype=np.int32), c) assert np.array_equal(a1=result, a2=should_be) # if nothing blocked board_2d = np.array([Poker.CARD_NOT_DEALT_TOKEN_2D], dtype=np.int8) result = PokerRange.get_possible_range_idxs( rules=env_bldr.rules, lut_holder=env_bldr.lut_holder, board_2d=board_2d) should_be = np.arange(env_bldr.rules.RANGE_SIZE, dtype=np.int32) assert np.array_equal(a1=result, a2=should_be)
def test_normalize(self): env_bldr = get_leduc_env_bldr() range_ = PokerRange(env_bldr=env_bldr) range_._range = np.random.random(size=env_bldr.rules.RANGE_SIZE) range_.normalize() np.testing.assert_allclose(np.sum(range_._range), 1, atol=0.0001)
def test_normalize_all_zero(self): env_bldr = get_leduc_env_bldr() range_ = PokerRange(env_bldr=env_bldr) range_._range = np.zeros_like(range_._range) range_.normalize() np.testing.assert_allclose(np.sum(range_._range), 1, atol=0.0001)
def test_get_new_blockers_1d_leduc(self): env_bldr = get_leduc_env_bldr() range_ = PokerRange(env_bldr=env_bldr) full_board = np.array([[2, 1]], dtype=np.int8) should_be = { Poker.PREFLOP: env_bldr.lut_holder.get_1d_cards(full_board[:0]), Poker.FLOP: env_bldr.lut_holder.get_1d_cards(full_board), } for _round in [Poker.FLOP]: _n = env_bldr.lut_holder.DICT_LUT_N_CARDS_OUT[ Poker.FLOP] - env_bldr.lut_holder.DICT_LUT_N_CARDS_OUT[_round] if _round == Poker.FLOP: board_2d = np.copy(full_board) else: board_2d = np.concatenate( (full_board[:env_bldr.lut_holder. DICT_LUT_N_CARDS_OUT[_round]], np.array( [Poker.CARD_NOT_DEALT_TOKEN_2D for _ in range(_n)], dtype=np.int8))) result = range_._get_new_blockers_1d(game_round=_round, board_2d=board_2d) assert np.array_equal(a1=result, a2=should_be[_round])
def test_get_new_blockers_1d_holdem(self): env_bldr = get_holdem_env_bldr() range_ = PokerRange(env_bldr=env_bldr) full_board = np.array([[1, 2], [3, 3], [12, 1], [5, 2], [6, 0]], dtype=np.int8) should_be = { Poker.PREFLOP: env_bldr.lut_holder.get_1d_cards(full_board[:0]), Poker.FLOP: env_bldr.lut_holder.get_1d_cards(full_board[0:3]), Poker.TURN: env_bldr.lut_holder.get_1d_cards(full_board[3:4]), Poker.RIVER: env_bldr.lut_holder.get_1d_cards(full_board[4:5]), } for _round in [Poker.PREFLOP, Poker.FLOP, Poker.TURN, Poker.RIVER]: _n = env_bldr.lut_holder.DICT_LUT_N_CARDS_OUT[Poker.RIVER] - \ env_bldr.lut_holder.DICT_LUT_N_CARDS_OUT[_round] if _round == Poker.RIVER: board_2d = np.copy(full_board) else: board_2d = np.concatenate( (full_board[:env_bldr.lut_holder. DICT_LUT_N_BOARD_BRANCHES[_round]], np.array( [Poker.CARD_NOT_DEALT_TOKEN_2D for _ in range(_n)], dtype=np.int8))) result = range_._get_new_blockers_1d(game_round=_round, board_2d=board_2d) assert np.array_equal(a1=result, a2=should_be[_round])
def test_get_card_probs_holdem(self): env_bldr = get_holdem_env_bldr() range_ = PokerRange(env_bldr=env_bldr) cards_to_remove = np.array([0, 3, 6, 33, 21, 51], np.int8) # use previously tested method to make this test easier range_.set_cards_to_zero_prob( cards_2d=env_bldr.lut_holder.get_2d_cards(cards_to_remove)) r = range_.get_card_probs() assert np.allclose(np.sum(r), 2, atol=0.00001) for c in cards_to_remove: assert np.allclose(r[c], 0, atol=0.00001)
def test_remove_cards_from_raw_range_holdem(self): env_bldr = get_holdem_env_bldr() range_ = PokerRange(env_bldr=env_bldr) holdem_cards = np.array([[7, 2], [6, 0]], dtype=np.int8) range_.set_cards_to_zero_prob(cards_2d=holdem_cards) _ra = range_._range.reshape(-1, env_bldr.rules.RANGE_SIZE) for i in range(_ra.shape[0]): np.testing.assert_allclose(np.sum(_ra[i]), 1, atol=0.00001) _assert_cards_not_in_ranges(cards_2d=holdem_cards, ranges=_ra, rules=env_bldr.rules, lut_holder=env_bldr.lut_holder)
def _fill_chance_node_strategy(self, node): assert node.strategy is None if node.is_terminal: return if node.p_id_acting_next == self._tree.CHANCE_ID: game_round = node.children[0].env_state[EnvDictIdxs.current_round] n_children = len(node.children) assert n_children == self._env_bldr.lut_holder.DICT_LUT_N_BOARDS[ game_round] # chance nodes are uniform random node.strategy = np.zeros(shape=(self._env_bldr.rules.RANGE_SIZE, n_children), dtype=np.float32) # set strategy for impossible hands to 0 for c_id in range(n_children): mask = PokerRange.get_possible_range_idxs( rules=self._env_bldr.rules, lut_holder=self._env_bldr.lut_holder, board_2d=node.children[c_id].env_state[ EnvDictIdxs.board_2d]) node.strategy[ mask, c_id] = 1.0 / (self._env_bldr.rules.N_CARDS_IN_DECK - 2) for c in node.children: self._fill_chance_node_strategy(node=c)
def __init__(self, t_prof, chief_handle, eval_agent_cls): assert t_prof.n_seats == 2 self.t_prof = t_prof self.lbr_args = t_prof.module_args["lbr"] self._eval_env_bldr = _util.get_env_builder_lbr(t_prof=t_prof) self.check_to_round = self.lbr_args.lbr_check_to_round self.chief_handle = chief_handle self.agent = _AgentWrapper(t_prof=t_prof, lbr_args=self.lbr_args, eval_agent_cls=eval_agent_cls) # has different raise sizes than agent's env! This needs to be considered when updating the envs after opp acts self._env = None self.agent_range = PokerRange(env_bldr=self._eval_env_bldr) assert self.check_to_round is None or (self.check_to_round in self._eval_env_bldr.rules.ALL_ROUNDS_LIST)
def test_create(self): env_bldr = get_leduc_env_bldr() range_ = PokerRange(env_bldr=env_bldr) assert np.allclose(np.sum(range_._range.reshape( -1, env_bldr.rules.RANGE_SIZE), axis=1), 1, atol=0.0001)
def test_get_possible_range_idxs_holdem(self): env_bldr = get_holdem_env_bldr() for n in range(2, 9): board_2d = np.array( [[0, 0], [5, 2], [12, 3], Poker.CARD_NOT_DEALT_TOKEN_2D], dtype=np.int8) result = PokerRange.get_possible_range_idxs( rules=env_bldr.rules, lut_holder=env_bldr.lut_holder, board_2d=board_2d) assert result.shape[0] == 1176 # nck of 49:2 # all of these should be blocked for e in [0, 1, 2, 3, 4, 50, 1325]: assert not np.any(result == e)
def get_n_board_branches_LUT(self): _N_CARDS_DEALT_IN_TRANSITION_TO_LUT = self.get_n_cards_dealt_in_transition_to_LUT( ) _N_CARDS_OUT_AT = self.get_n_cards_out_at_LUT() lut = {Poker.PREFLOP: 0} for r in [ _r for _r in self.rules.ALL_ROUNDS_LIST if _r != Poker.PREFLOP ]: nc = self.rules.N_CARDS_IN_DECK \ - _N_CARDS_OUT_AT[self.rules.ROUND_BEFORE[r]] \ - self.rules.N_HOLE_CARDS # get_range_size is actually a general combinatorial function that we can also use here lut[r] = PokerRange.get_range_size( n_hole_cards=_N_CARDS_DEALT_IN_TRANSITION_TO_LUT[r], n_cards_in_deck=nc) return lut
def test_get_range_size(self): assert PokerRange.get_range_size(n_hole_cards=2, n_cards_in_deck=52) == 1326 assert PokerRange.get_range_size(n_hole_cards=4, n_cards_in_deck=52) == 270725 assert PokerRange.get_range_size(n_hole_cards=1, n_cards_in_deck=52) == 52 assert PokerRange.get_range_size(n_hole_cards=1, n_cards_in_deck=6) == 6 assert PokerRange.get_range_size(n_hole_cards=1, n_cards_in_deck=6) == 6 assert PokerRange.get_range_size(n_hole_cards=3, n_cards_in_deck=6) == 20
class LocalLBRWorker: """ Slave to EvalLBRMaster. Does the LBR computation as described in https://arxiv.org/abs/1612.07547 """ def __init__(self, t_prof, chief_handle, eval_agent_cls): assert t_prof.n_seats == 2 self.t_prof = t_prof self.lbr_args = t_prof.module_args["lbr"] self._eval_env_bldr = _util.get_env_builder_lbr(t_prof=t_prof) self.check_to_round = self.lbr_args.lbr_check_to_round self.chief_handle = chief_handle self.agent = _AgentWrapper(t_prof=t_prof, lbr_args=self.lbr_args, eval_agent_cls=eval_agent_cls) # has different raise sizes than agent's env! This needs to be considered when updating the envs after opp acts self._env = None self.agent_range = PokerRange(env_bldr=self._eval_env_bldr) assert self.check_to_round is None or ( self.check_to_round in self._eval_env_bldr.rules.ALL_ROUNDS_LIST) def run(self, agent_seat_id, n_iterations, mode, stack_size): """ returns an estimate of a lower bound of the exploitablity of the agent """ self.agent.set_mode(mode=mode) self.agent.to_stack_size(stack_size) self.agent_range.reset() self._env = self._eval_env_bldr.get_new_env(is_evaluating=True, stack_size=stack_size) if not self.agent.can_compute_mode(): return None if self._eval_env_bldr.env_cls.IS_FIXED_LIMIT_GAME: return self._run_limit(agent_seat_id=agent_seat_id, n_iterations=n_iterations) else: return self._run_no_limit(agent_seat_id=agent_seat_id, n_iterations=n_iterations) def update_weights(self, weights_for_eval_agent): self.agent.update_weights(weights_for_eval_agent) def _reset_episode(self): ret = self._env.reset() self.agent.reset(deck_state_dict=self._env.cards_state_dict()) self.agent_range.reset() return ret def _run_limit(self, agent_seat_id, n_iterations): total_lbr_winnings = np.empty(shape=n_iterations, dtype=np.float32) lbr_seat_id = 1 - agent_seat_id for iteration_id in range(n_iterations): if iteration_id % 50 == 0: print("LBR hand: ", iteration_id) # """"""""""""""""" # Reset # """"""""""""""""" env_obs, reward, terminal, info = self._reset_episode() lbr_hand = self._env.get_hole_cards_of_player(p_id=lbr_seat_id) self.agent_range.set_cards_to_zero_prob(cards_2d=lbr_hand) # """"""""""""""""" # Play Episode # """"""""""""""""" while not terminal: p_id_acting = self._env.current_player.seat_id if self.t_prof.DEBUGGING: assert p_id_acting == self.agent.cpu_agent._internal_env_wrapper.env.current_player.seat_id if p_id_acting == lbr_seat_id: # optional feature: check the first N rounds 100% as LBR if (self.check_to_round is not None) and ( self._env.current_round < self.check_to_round): action_int = Poker.CHECK_CALL else: _rollout_mngr = _LBRRolloutManager( t_prof=self.t_prof, env_bldr=self._eval_env_bldr, env=self._env, lbr_hand_2d=lbr_hand) # illegal: -1, fold: 0, all other: any float _utility = np.full(shape=3, fill_value=-1.0, dtype=np.float32) # ev(s, lbr_a=fold) _utility[Poker.FOLD] = 0.0 # ev(s, lbr_a=check_call) _wp = _rollout_mngr.get_lbr_checkdown_equity( agent_range=self.agent_range ) # if check/called down _asked = self._env.seats[ agent_seat_id].current_bet - self._env.seats[ lbr_seat_id].current_bet _pot_before_action = self._env.get_all_winnable_money() _utility[ Poker.CHECK_CALL] = _wp * _pot_before_action - ( 1 - _wp) * _asked # prepare for raise simulation if Poker.BET_RAISE in self._env.get_legal_actions(): _saved_env_state = self._env.state_dict() _saved_agent_env_state = self.agent.env_state_dict( ) _saved_agent_range_state = self.agent_range.state_dict( ) # compute ev for raise # _________________________________ simulate LBR play r ____________________________________ self._env.step(action=Poker.BET_RAISE) _pot_after_raise = self._env.get_all_winnable_money( ) self.agent.notify_of_action( p_id_acted=lbr_seat_id, action_he_did=Poker.BET_RAISE) # what agent would do after LBR raises. DOESN'T STEP INTERNAL ENV! _, a_probs_each_hand = self.agent.get_action( step_env=False, need_probs=True) # _______________________________ simulate agent reaction __________________________________ # p(agent_fold) _fold_prob = np.sum( self.agent_range.range * a_probs_each_hand[:, Poker.FOLD]) # p(not agent_fold | hand) _p_not_fold_per_hand = ( 1 - a_probs_each_hand[:, Poker.FOLD]) # agent_range after not folding self.agent_range.mul_and_norm(_p_not_fold_per_hand) # p(lbr_win | lbr play r -> agent play not fold) _wp_now = _rollout_mngr.get_lbr_checkdown_equity( agent_range=self.agent_range) # ev(state, lbr_a=r) _chips_lbr_puts_in_pot = _pot_after_raise - _pot_before_action _ev_if_fold = _pot_before_action _ev_if_not_fold = (_wp_now * _pot_after_raise) - ( (1 - _wp_now) * _chips_lbr_puts_in_pot) _utility[ Poker.BET_RAISE] = _fold_prob * _ev_if_fold + ( 1 - _fold_prob) * _ev_if_not_fold # ________________________________________ reset ___________________________________________ self.agent_range.load_state_dict( _saved_agent_range_state) self._env.load_state_dict(_saved_env_state) self.agent.load_env_state_dict( _saved_agent_env_state) # select action with highest approximated EV action_int = np.argmax(_utility) # ________________________________________ notify agent ____________________________________________ self.agent.notify_of_action(p_id_acted=lbr_seat_id, action_he_did=action_int) else: # agent has to act action_int, a_probs_each_hand = self.agent.get_action( step_env=True, need_probs=True) self.agent_range.update_after_action( action=action_int, all_a_probs_for_all_hands=a_probs_each_hand) # _____________________________________________ step ___________________________________________________ old_game_round = self._env.current_round env_obs, reward, terminal, info = self._env.step( action=action_int) if self._env.current_round != old_game_round: self.agent_range.update_after_new_round( new_round=self._env.current_round, board_now_2d=self._env.board) total_lbr_winnings[iteration_id] = reward[ lbr_seat_id] * self._env.REWARD_SCALAR * self._env.EV_NORMALIZER return total_lbr_winnings def _run_no_limit(self, agent_seat_id, n_iterations): total_lbr_winnings = np.empty(shape=n_iterations, dtype=np.float32) lbr_seat_id = 1 - agent_seat_id n_lbr_bets = len(self._env.bet_sizes_list_as_frac_of_pot) for iteration_id in range(n_iterations): if iteration_id % 50 == 0: print("LBR hand: ", iteration_id) # """"""""""""""""" # Reset # """"""""""""""""" env_obs, reward, done, info = self._reset_episode() lbr_hand = self._env.get_hole_cards_of_player(p_id=lbr_seat_id) self.agent_range.set_cards_to_zero_prob(cards_2d=lbr_hand) # """"""""""""""""" # Play Episode # """"""""""""""""" while not done: p_id_acting = self._env.current_player.seat_id if self.t_prof.DEBUGGING: assert p_id_acting == self.agent.cpu_agent._internal_env_wrapper.env.current_player.seat_id if p_id_acting == lbr_seat_id: # optional feature: check the first N rounds 100% as LBR if (self.check_to_round is not None) and ( self._env.current_round < self.check_to_round): action_int = Poker.CHECK_CALL else: _rollout_mngr = _LBRRolloutManager( t_prof=self.t_prof, env_bldr=self._eval_env_bldr, env=self._env, lbr_hand_2d=lbr_hand) # illegal: -1, fold: 0, all other: any float _utility = np.full(shape=2 + n_lbr_bets, fill_value=-1.0, dtype=np.float32) # ev(s, lbr_a=fold) _utility[Poker.FOLD] = 0.0 # ev(s, lbr_a=check_call) _wp = _rollout_mngr.get_lbr_checkdown_equity( agent_range=self.agent_range) _asked = self._env.seats[ agent_seat_id].current_bet - self._env.seats[ lbr_seat_id].current_bet _pot_before_action = self._env.get_all_winnable_money() _utility[ Poker.CHECK_CALL] = _wp * _pot_before_action - ( 1 - _wp) * _asked # prepare for raise simulation _saved_env_state = self._env.state_dict() _saved_agent_env_state = self.agent.env_state_dict() _saved_agent_range_state = self.agent_range.state_dict( ) _legal_raises = self._env.get_legal_actions() for a in [Poker.FOLD, Poker.CHECK_CALL]: if a in _legal_raises: _legal_raises.remove(a) # compute ev for all raise sizes LBR can choose from for r in _legal_raises: raise_frac = self._env.bet_sizes_list_as_frac_of_pot[ r - 2] # _________________________________ simulate LBR play r ____________________________________ self._env.step(action=r) _pot_after_raise = self._env.get_all_winnable_money( ) self.agent.notify_of_raise_frac_action( p_id_acted=lbr_seat_id, frac=raise_frac) if self.t_prof.DEBUGGING: assert agent_seat_id == self.agent.cpu_agent._internal_env_wrapper.env.current_player.seat_id # what agent would do after LBR raises. DOESN'T STEP INTERNAL ENV! a_probs_each_hand = self.agent.get_a_probs_for_each_hand( ) # _______________________________ simulate agent reaction __________________________________ # p(agent_fold) _fold_prob = np.sum( self.agent_range.range * a_probs_each_hand[:, Poker.FOLD]) # p(not agent_fold | hand) _p_not_fold_per_hand = ( 1 - a_probs_each_hand[:, Poker.FOLD]) # agent_range after not folding self.agent_range.mul_and_norm(_p_not_fold_per_hand) # p(lbr_win | lbr play r -> agent play not fold) _wp_now = _rollout_mngr.get_lbr_checkdown_equity( agent_range=self.agent_range) # ev(state, lbr_a=r) _chips_lbr_puts_in_pot = _pot_after_raise - _pot_before_action _ev_if_fold = _pot_before_action _ev_if_not_fold = (_wp_now * _pot_after_raise) - ( (1 - _wp_now) * _chips_lbr_puts_in_pot) _utility[r] = _fold_prob * _ev_if_fold + ( 1 - _fold_prob) * _ev_if_not_fold # ________________________________________ reset ___________________________________________ self.agent_range.load_state_dict( _saved_agent_range_state) self._env.load_state_dict(_saved_env_state) self.agent.load_env_state_dict( _saved_agent_env_state) # select action with highest approximated EV action_int = np.argmax(_utility) # ________________________________________ notify agent ____________________________________________ if action_int >= 2: raise_frac = self._env.bet_sizes_list_as_frac_of_pot[ action_int - 2] self.agent.notify_of_raise_frac_action( p_id_acted=lbr_seat_id, frac=raise_frac) else: self.agent.notify_of_action(p_id_acted=lbr_seat_id, action_he_did=action_int) else: # agent has to act if self.t_prof.DEBUGGING: assert p_id_acting == self.agent.cpu_agent._internal_env_wrapper.env.current_player.seat_id action_int, a_probs_each_hand = self.agent.get_action( step_env=True, need_probs=True) self.agent_range.update_after_action( action=action_int, all_a_probs_for_all_hands=a_probs_each_hand) if action_int >= 2: # querying what the bet size is in the agent's env_args (these might differ from LBR's!). raise_frac = \ self.agent.cpu_agent.env_bldr.env_args.bet_sizes_list_as_frac_of_pot[action_int - 2] # _____________________________________________ step ___________________________________________________ old_game_round = self._env.current_round if action_int >= 2: # step with fraction because agent and LBR have different raise sizes env_obs, reward, done, info = self._env.step_raise_pot_frac( pot_frac=raise_frac) else: env_obs, reward, done, info = self._env.step( action=action_int) if self._env.current_round != old_game_round: self.agent_range.update_after_new_round( new_round=self._env.current_round, board_now_2d=self._env.board) total_lbr_winnings[iteration_id] = reward[ lbr_seat_id] * self._env.REWARD_SCALAR * self._env.EV_NORMALIZER return total_lbr_winnings
class Flop3HoldemRules: """ General rules of Texas Hold'em poker games """ N_HOLE_CARDS = 2 N_RANKS = 13 N_SUITS = 4 N_CARDS_IN_DECK = N_RANKS * N_SUITS RANGE_SIZE = PokerRange.get_range_size(n_hole_cards=N_HOLE_CARDS, n_cards_in_deck=N_CARDS_IN_DECK) BTN_IS_FIRST_POSTFLOP = False N_FLOP_CARDS = 3 N_TURN_CARDS = 0 N_RIVER_CARDS = 0 N_TOTAL_BOARD_CARDS = N_FLOP_CARDS ALL_ROUNDS_LIST = [Poker.PREFLOP, Poker.FLOP] SUITS_MATTER = True ROUND_BEFORE = { Poker.PREFLOP: Poker.PREFLOP, Poker.FLOP: Poker.PREFLOP, Poker.TURN: None, Poker.RIVER: None, } ROUND_AFTER = { Poker.PREFLOP: Poker.FLOP, Poker.FLOP: None, Poker.TURN: None, Poker.RIVER: None, } RANK_DICT = { Poker.CARD_NOT_DEALT_TOKEN_1D: "", 0: "2", 1: "3", 2: "4", 3: "5", 4: "6", 5: "7", 6: "8", 7: "9", 8: "T", 9: "J", 10: "Q", 11: "K", 12: "A" } SUIT_DICT = { Poker.CARD_NOT_DEALT_TOKEN_1D: "", 0: "h", 1: "d", 2: "s", 3: "c" } STRING = "FLOP3_HOLDEM_RULES" def __init__(self): from PokerRL.game._.cpp_wrappers.CppHandeval import CppHandeval self._clib = CppHandeval() def get_hand_rank_all_hands_on_given_boards(self, boards_1d, lut_holder): """ for docs refer to PokerEnv """ raise NotImplementedError( "Batched hand eval for multiple boards is unfortunately not implemented for FHP at the moment." ) # TODO def get_hand_rank(self, hand_2d, board_2d): """ for docs refer to PokerEnv """ return self._clib.get_hand_rank_fhp3(hand_2d=hand_2d, board_2d=board_2d) @classmethod def get_lut_holder(cls): from PokerRL.game._.look_up_table import LutHolderHoldem return LutHolderHoldem(cls)
class BigLeducRules: N_HOLE_CARDS = 1 N_RANKS = 12 N_SUITS = 2 N_CARDS_IN_DECK = N_RANKS * N_SUITS RANGE_SIZE = PokerRange.get_range_size(n_hole_cards=N_HOLE_CARDS, n_cards_in_deck=N_CARDS_IN_DECK) BTN_IS_FIRST_POSTFLOP = True N_FLOP_CARDS = 1 N_TURN_CARDS = 0 N_RIVER_CARDS = 0 N_TOTAL_BOARD_CARDS = N_FLOP_CARDS + N_TURN_CARDS + N_RIVER_CARDS ALL_ROUNDS_LIST = [Poker.PREFLOP, Poker.FLOP] SUITS_MATTER = False ROUND_BEFORE = { Poker.PREFLOP: Poker.PREFLOP, Poker.FLOP: Poker.PREFLOP } ROUND_AFTER = { Poker.PREFLOP: Poker.FLOP, Poker.FLOP: None } RANK_DICT = {i: str(i + 2) for i in range(N_RANKS)} SUIT_DICT = {k: ["a", "b", "c", "d", "e", "f", "g"][k] for k in range(N_SUITS)} \ if N_SUITS < 8 \ else {i: "_" + str(i) for i in range(N_SUITS)} STRING = "BIG_LEDUC_RULES" def __init__(self): pass def get_hand_rank_all_hands_on_given_boards(self, boards_1d, lut_holder): """ for general docs refer to PokerEnv """ hand_ranks = np.full(shape=(boards_1d.shape[0], LeducRules.RANGE_SIZE), fill_value=-1, dtype=np.int32) for board_idx in range(boards_1d.shape[0]): for range_idx in range(LeducRules.RANGE_SIZE): hand_ranks[board_idx, range_idx] = self.get_hand_rank( hand_2d=lut_holder.get_2d_hole_cards_from_range_idx(range_idx=range_idx), board_2d=lut_holder.get_2d_cards(cards_1d=boards_1d[board_idx])) return hand_ranks def get_hand_rank(self, hand_2d, board_2d): """ for docs refer to PokerEnv """ if board_2d[0, 0] == hand_2d[0, 0]: return 10000 + hand_2d[0, 0] else: return hand_2d[0, 0] @classmethod def get_lut_holder(cls): from PokerRL.game._.look_up_table import LutHolderLeduc return LutHolderLeduc(cls)
def test_save_load(self): env_bldr = get_leduc_env_bldr() range_ = PokerRange(env_bldr=env_bldr) range_.load_state_dict(range_.state_dict())
def test_get_card_probs_leduc(self): env_bldr = get_leduc_env_bldr() range_ = PokerRange(env_bldr=env_bldr) assert np.array_equal(range_.get_card_probs(), range_._range)
class PLORules: """ General rules of Pot Limit Omaha """ N_HOLE_CARDS = 4 N_RANKS = 13 N_SUITS = 4 N_CARDS_IN_DECK = N_RANKS * N_SUITS RANGE_SIZE = PokerRange.get_range_size(n_hole_cards=N_HOLE_CARDS, n_cards_in_deck=N_CARDS_IN_DECK) BTN_IS_FIRST_POSTFLOP = False N_FLOP_CARDS = 3 N_TURN_CARDS = 1 N_RIVER_CARDS = 1 N_TOTAL_BOARD_CARDS = N_FLOP_CARDS + N_TURN_CARDS + N_RIVER_CARDS ALL_ROUNDS_LIST = [Poker.PREFLOP, Poker.FLOP, Poker.TURN, Poker.RIVER] SUITS_MATTER = True ROUND_BEFORE = { Poker.PREFLOP: Poker.PREFLOP, Poker.FLOP: Poker.PREFLOP, Poker.TURN: Poker.FLOP, Poker.RIVER: Poker.TURN } ROUND_AFTER = { Poker.PREFLOP: Poker.FLOP, Poker.FLOP: Poker.TURN, Poker.TURN: Poker.RIVER, Poker.RIVER: None } RANK_DICT = { Poker.CARD_NOT_DEALT_TOKEN_1D: "", 0: "2", 1: "3", 2: "4", 3: "5", 4: "6", 5: "7", 6: "8", 7: "9", 8: "T", 9: "J", 10: "Q", 11: "K", 12: "A" } SUIT_DICT = { Poker.CARD_NOT_DEALT_TOKEN_1D: "", 0: "h", 1: "d", 2: "s", 3: "c" } STRING = "PLO_RULES" def __init__(self): from PokerRL.game._.cpp_wrappers.CppHandeval import CppHandeval self._clib = CppHandeval() def get_hand_rank_all_hands_on_given_boards(self, boards_1d, lut_holder): """ for docs refer to PokerEnv returns a numpy array [1,hole_hands_total] where best hand has biggest number and not possible hand has -1 """ r = self._clib.get_hand_rank_all_hands_on_given_boards_52_holdem(boards_1d=boards_1d, lut_holder=lut_holder) return r def get_hand_rank(self, hand_2d, board_2d): """ for docs refer to PokerEnv """ r = self._clib.get_hand_rank_52_plo(hand_2d=hand_2d, board_2d=board_2d) return r @classmethod def get_lut_holder(cls): from PokerRL.game._.look_up_table import LutHolderPLO return LutHolderPLO(cls)